From 9a18664506dbce5e23f3c5de7b1c5a042dd26520 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 23 Jun 2005 21:29:18 +1000 Subject: drm: 32/64-bit DRM ioctl compatibility patch The patch is against a 2.6.11 kernel tree. I am running this with a 32-bit X server (compiled up from X.org CVS as of a couple of weeks ago) and 32-bit DRI libraries and clients. All the userland stuff is identical to what I am using under a 32-bit kernel on my G4 powerbook (which is a 32-bit machine of course). I haven't tried compiling up a 64-bit X server or clients yet. In the compatibility routines I have assumed that the kernel can safely access user addresses after set_fs(KERNEL_DS). That is, where an ioctl argument structure contains pointers to other structures, and those other structures are already compatible between the 32-bit and 64-bit ABIs (i.e. they only contain things like chars, shorts or ints), I just check the address with access_ok() and then pass it through to the 64-bit ioctl code. I believe this approach may not work on sparc64, but it does work on ppc64 and x86_64 at least. One tricky area which may need to be revisited is the question of how to handle the handles which we pass back to userspace to identify mappings. These handles are generated in the ADDMAP ioctl and then passed in as the offset value to mmap. However, offset values for mmap seem to be generated in other ways as well, particularly for AGP mappings. The approach I have ended up with is to generate a fake 32-bit handle only for _DRM_SHM mappings. The handles for other mappings (AGP, REG, FB) are physical addresses which are already limited to 32 bits, and generating fake handles for them created all sorts of problems in the mmap/nopage code. This patch has been updated to use the new compatibility ioctls. From: Paul Mackerras Signed-off-by: Dave Airlie --- drivers/char/drm/Makefile | 5 + drivers/char/drm/drmP.h | 5 + drivers/char/drm/drm_bufs.c | 25 +- drivers/char/drm/drm_context.c | 6 +- drivers/char/drm/drm_ioc32.c | 1069 +++++++++++++++++++++++++++++++++++++++ drivers/char/drm/radeon_drv.c | 3 + drivers/char/drm/radeon_drv.h | 3 + drivers/char/drm/radeon_ioc32.c | 395 +++++++++++++++ 8 files changed, 1501 insertions(+), 10 deletions(-) create mode 100644 drivers/char/drm/drm_ioc32.c create mode 100644 drivers/char/drm/radeon_ioc32.c diff --git a/drivers/char/drm/Makefile b/drivers/char/drm/Makefile index 23ab26321e9..7444dec40b9 100644 --- a/drivers/char/drm/Makefile +++ b/drivers/char/drm/Makefile @@ -19,6 +19,11 @@ radeon-objs := radeon_drv.o radeon_cp.o radeon_state.o radeon_mem.o radeon_irq.o ffb-objs := ffb_drv.o ffb_context.o sis-objs := sis_drv.o sis_ds.o sis_mm.o +ifeq ($(CONFIG_COMPAT),y) +drm-objs += drm_ioc32.o +radeon-objs += radeon_ioc32.o +endif + obj-$(CONFIG_DRM) += drm.o obj-$(CONFIG_DRM_GAMMA) += gamma.o obj-$(CONFIG_DRM_TDFX) += tdfx.o diff --git a/drivers/char/drm/drmP.h b/drivers/char/drm/drmP.h index 21f4c54e1a8..b04ddf12a0f 100644 --- a/drivers/char/drm/drmP.h +++ b/drivers/char/drm/drmP.h @@ -316,6 +316,9 @@ do { \ typedef int drm_ioctl_t( struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg ); +typedef int drm_ioctl_compat_t(struct file *filp, unsigned int cmd, + unsigned long arg); + typedef struct drm_ioctl_desc { drm_ioctl_t *func; int auth_needed; @@ -775,6 +778,8 @@ extern int drm_version(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg); extern int drm_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg); +extern long drm_compat_ioctl(struct file *filp, + unsigned int cmd, unsigned long arg); extern int drm_takedown(drm_device_t * dev); /* Device support (drm_fops.h) */ diff --git a/drivers/char/drm/drm_bufs.c b/drivers/char/drm/drm_bufs.c index 4113bcba67f..3407380b865 100644 --- a/drivers/char/drm/drm_bufs.c +++ b/drivers/char/drm/drm_bufs.c @@ -60,6 +60,15 @@ int drm_order( unsigned long size ) } EXPORT_SYMBOL(drm_order); +#ifdef CONFIG_COMPAT +/* + * Used to allocate 32-bit handles for _DRM_SHM regions + * The 0x10000000 value is chosen to be out of the way of + * FB/register and GART physical addresses. + */ +static unsigned int map32_handle = 0x10000000; +#endif + /** * Ioctl to specify a range of memory that is available for mapping by a non-root process. * @@ -187,16 +196,18 @@ int drm_addmap( struct inode *inode, struct file *filp, down(&dev->struct_sem); list_add(&list->head, &dev->maplist->head); +#ifdef CONFIG_COMPAT + /* Assign a 32-bit handle for _DRM_SHM mappings */ + /* We do it here so that dev->struct_sem protects the increment */ + if (map->type == _DRM_SHM) + map->offset = map32_handle += PAGE_SIZE; +#endif up(&dev->struct_sem); if ( copy_to_user( argp, map, sizeof(*map) ) ) return -EFAULT; - if ( map->type != _DRM_SHM ) { - if ( copy_to_user( &argp->handle, - &map->offset, - sizeof(map->offset) ) ) - return -EFAULT; - } + if (copy_to_user(&argp->handle, &map->offset, sizeof(map->offset))) + return -EFAULT; return 0; } @@ -240,7 +251,7 @@ int drm_rmmap(struct inode *inode, struct file *filp, r_list = list_entry(list, drm_map_list_t, head); if(r_list->map && - r_list->map->handle == request.handle && + r_list->map->offset == (unsigned long) request.handle && r_list->map->flags & _DRM_REMOVABLE) break; } diff --git a/drivers/char/drm/drm_context.c b/drivers/char/drm/drm_context.c index f15c86c5787..fdf661f234e 100644 --- a/drivers/char/drm/drm_context.c +++ b/drivers/char/drm/drm_context.c @@ -225,7 +225,7 @@ int drm_getsareactx(struct inode *inode, struct file *filp, map = dev->context_sareas[request.ctx_id]; up(&dev->struct_sem); - request.handle = map->handle; + request.handle = (void *) map->offset; if (copy_to_user(argp, &request, sizeof(request))) return -EFAULT; return 0; @@ -261,8 +261,8 @@ int drm_setsareactx(struct inode *inode, struct file *filp, down(&dev->struct_sem); list_for_each(list, &dev->maplist->head) { r_list = list_entry(list, drm_map_list_t, head); - if(r_list->map && - r_list->map->handle == request.handle) + if (r_list->map + && r_list->map->offset == (unsigned long) request.handle) goto found; } bad: diff --git a/drivers/char/drm/drm_ioc32.c b/drivers/char/drm/drm_ioc32.c new file mode 100644 index 00000000000..8087a963639 --- /dev/null +++ b/drivers/char/drm/drm_ioc32.c @@ -0,0 +1,1069 @@ +/** + * \file drm_ioc32.c + * + * 32-bit ioctl compatibility routines for the DRM. + * + * \author Paul Mackerras + * + * Copyright (C) Paul Mackerras 2005. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#include +#include + +#include "drmP.h" +#include "drm_core.h" + +#define DRM_IOCTL_VERSION32 DRM_IOWR(0x00, drm_version32_t) +#define DRM_IOCTL_GET_UNIQUE32 DRM_IOWR(0x01, drm_unique32_t) +#define DRM_IOCTL_GET_MAP32 DRM_IOWR(0x04, drm_map32_t) +#define DRM_IOCTL_GET_CLIENT32 DRM_IOWR(0x05, drm_client32_t) +#define DRM_IOCTL_GET_STATS32 DRM_IOR( 0x06, drm_stats32_t) + +#define DRM_IOCTL_SET_UNIQUE32 DRM_IOW( 0x10, drm_unique32_t) +#define DRM_IOCTL_ADD_MAP32 DRM_IOWR(0x15, drm_map32_t) +#define DRM_IOCTL_ADD_BUFS32 DRM_IOWR(0x16, drm_buf_desc32_t) +#define DRM_IOCTL_MARK_BUFS32 DRM_IOW( 0x17, drm_buf_desc32_t) +#define DRM_IOCTL_INFO_BUFS32 DRM_IOWR(0x18, drm_buf_info32_t) +#define DRM_IOCTL_MAP_BUFS32 DRM_IOWR(0x19, drm_buf_map32_t) +#define DRM_IOCTL_FREE_BUFS32 DRM_IOW( 0x1a, drm_buf_free32_t) + +#define DRM_IOCTL_RM_MAP32 DRM_IOW( 0x1b, drm_map32_t) + +#define DRM_IOCTL_SET_SAREA_CTX32 DRM_IOW( 0x1c, drm_ctx_priv_map32_t) +#define DRM_IOCTL_GET_SAREA_CTX32 DRM_IOWR(0x1d, drm_ctx_priv_map32_t) + +#define DRM_IOCTL_RES_CTX32 DRM_IOWR(0x26, drm_ctx_res32_t) +#define DRM_IOCTL_DMA32 DRM_IOWR(0x29, drm_dma32_t) + +#define DRM_IOCTL_AGP_ENABLE32 DRM_IOW( 0x32, drm_agp_mode32_t) +#define DRM_IOCTL_AGP_INFO32 DRM_IOR( 0x33, drm_agp_info32_t) +#define DRM_IOCTL_AGP_ALLOC32 DRM_IOWR(0x34, drm_agp_buffer32_t) +#define DRM_IOCTL_AGP_FREE32 DRM_IOW( 0x35, drm_agp_buffer32_t) +#define DRM_IOCTL_AGP_BIND32 DRM_IOW( 0x36, drm_agp_binding32_t) +#define DRM_IOCTL_AGP_UNBIND32 DRM_IOW( 0x37, drm_agp_binding32_t) + +#define DRM_IOCTL_SG_ALLOC32 DRM_IOW( 0x38, drm_scatter_gather32_t) +#define DRM_IOCTL_SG_FREE32 DRM_IOW( 0x39, drm_scatter_gather32_t) + +#define DRM_IOCTL_WAIT_VBLANK32 DRM_IOWR(0x3a, drm_wait_vblank32_t) + +typedef struct drm_version_32 { + int version_major; /**< Major version */ + int version_minor; /**< Minor version */ + int version_patchlevel;/**< Patch level */ + u32 name_len; /**< Length of name buffer */ + u32 name; /**< Name of driver */ + u32 date_len; /**< Length of date buffer */ + u32 date; /**< User-space buffer to hold date */ + u32 desc_len; /**< Length of desc buffer */ + u32 desc; /**< User-space buffer to hold desc */ +} drm_version32_t; + +static int compat_drm_version(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_version32_t v32; + drm_version_t __user *version; + int err; + + if (copy_from_user(&v32, (void __user *) arg, sizeof(v32))) + return -EFAULT; + + version = compat_alloc_user_space(sizeof(*version)); + if (!access_ok(VERIFY_WRITE, version, sizeof(*version))) + return -EFAULT; + if (__put_user(v32.name_len, &version->name_len) + || __put_user((void __user *)(unsigned long)v32.name, + &version->name) + || __put_user(v32.date_len, &version->date_len) + || __put_user((void __user *)(unsigned long)v32.date, + &version->date) + || __put_user(v32.desc_len, &version->desc_len) + || __put_user((void __user *)(unsigned long)v32.desc, + &version->desc)) + return -EFAULT; + + err = drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_VERSION, (unsigned long) version); + if (err) + return err; + + if (__get_user(v32.version_major, &version->version_major) + || __get_user(v32.version_minor, &version->version_minor) + || __get_user(v32.version_patchlevel, &version->version_patchlevel) + || __get_user(v32.name_len, &version->name_len) + || __get_user(v32.date_len, &version->date_len) + || __get_user(v32.desc_len, &version->desc_len)) + return -EFAULT; + + if (copy_to_user((void __user *) arg, &v32, sizeof(v32))) + return -EFAULT; + return 0; +} + +typedef struct drm_unique32 { + u32 unique_len; /**< Length of unique */ + u32 unique; /**< Unique name for driver instantiation */ +} drm_unique32_t; + +static int compat_drm_getunique(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_unique32_t uq32; + drm_unique_t __user *u; + int err; + + if (copy_from_user(&uq32, (void __user *) arg, sizeof(uq32))) + return -EFAULT; + + u = compat_alloc_user_space(sizeof(*u)); + if (!access_ok(VERIFY_WRITE, u, sizeof(*u))) + return -EFAULT; + if (__put_user(uq32.unique_len, &u->unique_len) + || __put_user((void __user *)(unsigned long) uq32.unique, + &u->unique)) + return -EFAULT; + + err = drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_GET_UNIQUE, (unsigned long) u); + if (err) + return err; + + if (__get_user(uq32.unique_len, &u->unique_len)) + return -EFAULT; + if (copy_to_user((void __user *) arg, &uq32, sizeof(uq32))) + return -EFAULT; + return 0; +} + +static int compat_drm_setunique(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_unique32_t uq32; + drm_unique_t __user *u; + + if (copy_from_user(&uq32, (void __user *) arg, sizeof(uq32))) + return -EFAULT; + + u = compat_alloc_user_space(sizeof(*u)); + if (!access_ok(VERIFY_WRITE, u, sizeof(*u))) + return -EFAULT; + if (__put_user(uq32.unique_len, &u->unique_len) + || __put_user((void __user *)(unsigned long) uq32.unique, + &u->unique)) + return -EFAULT; + + return drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_SET_UNIQUE, (unsigned long) u); +} + +typedef struct drm_map32 { + u32 offset; /**< Requested physical address (0 for SAREA)*/ + u32 size; /**< Requested physical size (bytes) */ + drm_map_type_t type; /**< Type of memory to map */ + drm_map_flags_t flags; /**< Flags */ + u32 handle; /**< User-space: "Handle" to pass to mmap() */ + int mtrr; /**< MTRR slot used */ +} drm_map32_t; + +static int compat_drm_getmap(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_map32_t __user *argp = (void __user *)arg; + drm_map32_t m32; + drm_map_t __user *map; + int idx, err; + void *handle; + + if (get_user(idx, &argp->offset)) + return -EFAULT; + + map = compat_alloc_user_space(sizeof(*map)); + if (!access_ok(VERIFY_WRITE, map, sizeof(*map))) + return -EFAULT; + if (__put_user(idx, &map->offset)) + return -EFAULT; + + err = drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_GET_MAP, (unsigned long) map); + if (err) + return err; + + if (__get_user(m32.offset, &map->offset) + || __get_user(m32.size, &map->size) + || __get_user(m32.type, &map->type) + || __get_user(m32.flags, &map->flags) + || __get_user(handle, &map->handle) + || __get_user(m32.mtrr, &map->mtrr)) + return -EFAULT; + + m32.handle = (unsigned long) handle; + if (copy_to_user(argp, &m32, sizeof(m32))) + return -EFAULT; + return 0; + +} + +static int compat_drm_addmap(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_map32_t __user *argp = (void __user *)arg; + drm_map32_t m32; + drm_map_t __user *map; + int err; + void *handle; + + if (copy_from_user(&m32, argp, sizeof(m32))) + return -EFAULT; + + map = compat_alloc_user_space(sizeof(*map)); + if (!access_ok(VERIFY_WRITE, map, sizeof(*map))) + return -EFAULT; + if (__put_user(m32.offset, &map->offset) + || __put_user(m32.size, &map->size) + || __put_user(m32.type, &map->type) + || __put_user(m32.flags, &map->flags)) + return -EFAULT; + + err = drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_ADD_MAP, (unsigned long) map); + if (err) + return err; + + if (__get_user(m32.offset, &map->offset) + || __get_user(m32.mtrr, &map->mtrr) + || __get_user(handle, &map->handle)) + return -EFAULT; + + m32.handle = (unsigned long) handle; + if (m32.handle != (unsigned long) handle && printk_ratelimit()) + printk(KERN_ERR "compat_drm_addmap truncated handle" + " %p for type %d offset %x\n", + handle, m32.type, m32.offset); + + if (copy_to_user(argp, &m32, sizeof(m32))) + return -EFAULT; + + return 0; +} + +static int compat_drm_rmmap(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_map32_t __user *argp = (void __user *)arg; + drm_map_t __user *map; + u32 handle; + + if (get_user(handle, &argp->handle)) + return -EFAULT; + + map = compat_alloc_user_space(sizeof(*map)); + if (!access_ok(VERIFY_WRITE, map, sizeof(*map))) + return -EFAULT; + if (__put_user((void *)(unsigned long) handle, &map->handle)) + return -EFAULT; + + return drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_RM_MAP, (unsigned long) map); +} + +typedef struct drm_client32 { + int idx; /**< Which client desired? */ + int auth; /**< Is client authenticated? */ + u32 pid; /**< Process ID */ + u32 uid; /**< User ID */ + u32 magic; /**< Magic */ + u32 iocs; /**< Ioctl count */ +} drm_client32_t; + +static int compat_drm_getclient(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_client32_t c32; + drm_client32_t __user *argp = (void __user *)arg; + drm_client_t __user *client; + int idx, err; + + if (get_user(idx, &argp->idx)) + return -EFAULT; + + client = compat_alloc_user_space(sizeof(*client)); + if (!access_ok(VERIFY_WRITE, client, sizeof(*client))) + return -EFAULT; + if (__put_user(idx, &client->idx)) + return -EFAULT; + + err = drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_GET_CLIENT, (unsigned long) client); + if (err) + return err; + + if (__get_user(c32.auth, &client->auth) + || __get_user(c32.pid, &client->pid) + || __get_user(c32.uid, &client->uid) + || __get_user(c32.magic, &client->magic) + || __get_user(c32.iocs, &client->iocs)) + return -EFAULT; + + if (copy_to_user(argp, &c32, sizeof(c32))) + return -EFAULT; + return 0; +} + +typedef struct drm_stats32 { + u32 count; + struct { + u32 value; + drm_stat_type_t type; + } data[15]; +} drm_stats32_t; + +static int compat_drm_getstats(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_stats32_t s32; + drm_stats32_t __user *argp = (void __user *)arg; + drm_stats_t __user *stats; + int i, err; + + stats = compat_alloc_user_space(sizeof(*stats)); + if (!access_ok(VERIFY_WRITE, stats, sizeof(*stats))) + return -EFAULT; + + err = drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_GET_STATS, (unsigned long) stats); + if (err) + return err; + + if (__get_user(s32.count, &stats->count)) + return -EFAULT; + for (i = 0; i < 15; ++i) + if (__get_user(s32.data[i].value, &stats->data[i].value) + || __get_user(s32.data[i].type, &stats->data[i].type)) + return -EFAULT; + + if (copy_to_user(argp, &s32, sizeof(s32))) + return -EFAULT; + return 0; +} + +typedef struct drm_buf_desc32 { + int count; /**< Number of buffers of this size */ + int size; /**< Size in bytes */ + int low_mark; /**< Low water mark */ + int high_mark; /**< High water mark */ + int flags; + u32 agp_start; /**< Start address in the AGP aperture */ +} drm_buf_desc32_t; + +static int compat_drm_addbufs(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_buf_desc32_t __user *argp = (void __user *)arg; + drm_buf_desc_t __user *buf; + int err; + unsigned long agp_start; + + buf = compat_alloc_user_space(sizeof(*buf)); + if (!access_ok(VERIFY_WRITE, buf, sizeof(*buf)) + || !access_ok(VERIFY_WRITE, argp, sizeof(*argp))) + return -EFAULT; + + if (__copy_in_user(buf, argp, offsetof(drm_buf_desc32_t, agp_start)) + || __get_user(agp_start, &argp->agp_start) + || __put_user(agp_start, &buf->agp_start)) + return -EFAULT; + + err = drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_ADD_BUFS, (unsigned long) buf); + if (err) + return err; + + if (__copy_in_user(argp, buf, offsetof(drm_buf_desc32_t, agp_start)) + || __get_user(agp_start, &buf->agp_start) + || __put_user(agp_start, &argp->agp_start)) + return -EFAULT; + + return 0; +} + +static int compat_drm_markbufs(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_buf_desc32_t b32; + drm_buf_desc32_t __user *argp = (void __user *)arg; + drm_buf_desc_t __user *buf; + + if (copy_from_user(&b32, argp, sizeof(b32))) + return -EFAULT; + + buf = compat_alloc_user_space(sizeof(*buf)); + if (!access_ok(VERIFY_WRITE, buf, sizeof(*buf))) + return -EFAULT; + + if (__put_user(b32.size, &buf->size) + || __put_user(b32.low_mark, &buf->low_mark) + || __put_user(b32.high_mark, &buf->high_mark)) + return -EFAULT; + + return drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_MARK_BUFS, (unsigned long) buf); +} + +typedef struct drm_buf_info32 { + int count; /**< Entries in list */ + u32 list; +} drm_buf_info32_t; + +static int compat_drm_infobufs(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_buf_info32_t req32; + drm_buf_info32_t __user *argp = (void __user *)arg; + drm_buf_desc32_t __user *to; + drm_buf_info_t __user *request; + drm_buf_desc_t __user *list; + size_t nbytes; + int i, err; + int count, actual; + + if (copy_from_user(&req32, argp, sizeof(req32))) + return -EFAULT; + + count = req32.count; + to = (drm_buf_desc32_t __user *)(unsigned long) req32.list; + if (count < 0) + count = 0; + if (count > 0 + && !access_ok(VERIFY_WRITE, to, count * sizeof(drm_buf_desc32_t))) + return -EFAULT; + + nbytes = sizeof(*request) + count * sizeof(drm_buf_desc_t); + request = compat_alloc_user_space(nbytes); + if (!access_ok(VERIFY_WRITE, request, nbytes)) + return -EFAULT; + list = (drm_buf_desc_t *) (request + 1); + + if (__put_user(count, &request->count) + || __put_user(list, &request->list)) + return -EFAULT; + + err = drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_INFO_BUFS, (unsigned long) request); + if (err) + return err; + + if (__get_user(actual, &request->count)) + return -EFAULT; + if (count >= actual) + for (i = 0; i < actual; ++i) + if (__copy_in_user(&to[i], &list[i], + offsetof(drm_buf_desc_t, flags))) + return -EFAULT; + + if (__put_user(actual, &argp->count)) + return -EFAULT; + + return 0; +} + +typedef struct drm_buf_pub32 { + int idx; /**< Index into the master buffer list */ + int total; /**< Buffer size */ + int used; /**< Amount of buffer in use (for DMA) */ + u32 address; /**< Address of buffer */ +} drm_buf_pub32_t; + +typedef struct drm_buf_map32 { + int count; /**< Length of the buffer list */ + u32 virtual; /**< Mmap'd area in user-virtual */ + u32 list; /**< Buffer information */ +} drm_buf_map32_t; + +static int compat_drm_mapbufs(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_buf_map32_t __user *argp = (void __user *)arg; + drm_buf_map32_t req32; + drm_buf_pub32_t __user *list32; + drm_buf_map_t __user *request; + drm_buf_pub_t __user *list; + int i, err; + int count, actual; + size_t nbytes; + void __user *addr; + + if (copy_from_user(&req32, argp, sizeof(req32))) + return -EFAULT; + count = req32.count; + list32 = (void __user *)(unsigned long)req32.list; + + if (count < 0) + return -EINVAL; + nbytes = sizeof(*request) + count * sizeof(drm_buf_pub_t); + request = compat_alloc_user_space(nbytes); + if (!access_ok(VERIFY_WRITE, request, nbytes)) + return -EFAULT; + list = (drm_buf_pub_t *) (request + 1); + + if (__put_user(count, &request->count) + || __put_user(list, &request->list)) + return -EFAULT; + + err = drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_MAP_BUFS, (unsigned long) request); + if (err) + return err; + + if (__get_user(actual, &request->count)) + return -EFAULT; + if (count >= actual) + for (i = 0; i < actual; ++i) + if (__copy_in_user(&list32[i], &list[i], + offsetof(drm_buf_pub_t, address)) + || __get_user(addr, &list[i].address) + || __put_user((unsigned long) addr, + &list32[i].address)) + return -EFAULT; + + if (__put_user(actual, &argp->count) + || __get_user(addr, &request->virtual) + || __put_user((unsigned long) addr, &argp->virtual)) + return -EFAULT; + + return 0; +} + +typedef struct drm_buf_free32 { + int count; + u32 list; +} drm_buf_free32_t; + +static int compat_drm_freebufs(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_buf_free32_t req32; + drm_buf_free_t __user *request; + drm_buf_free32_t __user *argp = (void __user *)arg; + + if (copy_from_user(&req32, argp, sizeof(req32))) + return -EFAULT; + + request = compat_alloc_user_space(sizeof(*request)); + if (!access_ok(VERIFY_WRITE, request, sizeof(*request))) + return -EFAULT; + if (__put_user(req32.count, &request->count) + || __put_user((int __user *)(unsigned long) req32.list, + &request->list)) + return -EFAULT; + + return drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_FREE_BUFS, (unsigned long) request); +} + +typedef struct drm_ctx_priv_map32 { + unsigned int ctx_id; /**< Context requesting private mapping */ + u32 handle; /**< Handle of map */ +} drm_ctx_priv_map32_t; + +static int compat_drm_setsareactx(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_ctx_priv_map32_t req32; + drm_ctx_priv_map_t __user *request; + drm_ctx_priv_map32_t __user *argp = (void __user *)arg; + + if (copy_from_user(&req32, argp, sizeof(req32))) + return -EFAULT; + + request = compat_alloc_user_space(sizeof(*request)); + if (!access_ok(VERIFY_WRITE, request, sizeof(*request))) + return -EFAULT; + if (__put_user(req32.ctx_id, &request->ctx_id) + || __put_user((void *)(unsigned long) req32.handle, + &request->handle)) + return -EFAULT; + + return drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_SET_SAREA_CTX, (unsigned long) request); +} + +static int compat_drm_getsareactx(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_ctx_priv_map_t __user *request; + drm_ctx_priv_map32_t __user *argp = (void __user *)arg; + int err; + unsigned int ctx_id; + void *handle; + + if (!access_ok(VERIFY_WRITE, argp, sizeof(*argp)) + || __get_user(ctx_id, &argp->ctx_id)) + return -EFAULT; + + request = compat_alloc_user_space(sizeof(*request)); + if (!access_ok(VERIFY_WRITE, request, sizeof(*request))) + return -EFAULT; + if (__put_user(ctx_id, &request->ctx_id)) + return -EFAULT; + + err = drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_GET_SAREA_CTX, (unsigned long) request); + if (err) + return err; + + if (__get_user(handle, &request->handle) + || __put_user((unsigned long) handle, &argp->handle)) + return -EFAULT; + + return 0; +} + +typedef struct drm_ctx_res32 { + int count; + u32 contexts; +} drm_ctx_res32_t; + +static int compat_drm_resctx(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_ctx_res32_t __user *argp = (void __user *)arg; + drm_ctx_res32_t res32; + drm_ctx_res_t __user *res; + int err; + + if (copy_from_user(&res32, argp, sizeof(res32))) + return -EFAULT; + + res = compat_alloc_user_space(sizeof(*res)); + if (!access_ok(VERIFY_WRITE, res, sizeof(*res))) + return -EFAULT; + if (__put_user(res32.count, &res->count) + || __put_user((drm_ctx_t __user *)(unsigned long) res32.contexts, + &res->contexts)) + return -EFAULT; + + err = drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_RES_CTX, (unsigned long) res); + if (err) + return err; + + if (__get_user(res32.count, &res->count) + || __put_user(res32.count, &argp->count)) + return -EFAULT; + + return 0; +} + +typedef struct drm_dma32 { + int context; /**< Context handle */ + int send_count; /**< Number of buffers to send */ + u32 send_indices; /**< List of handles to buffers */ + u32 send_sizes; /**< Lengths of data to send */ + drm_dma_flags_t flags; /**< Flags */ + int request_count; /**< Number of buffers requested */ + int request_size; /**< Desired size for buffers */ + u32 request_indices; /**< Buffer information */ + u32 request_sizes; + int granted_count; /**< Number of buffers granted */ +} drm_dma32_t; + +static int compat_drm_dma(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_dma32_t d32; + drm_dma32_t __user *argp = (void __user *) arg; + drm_dma_t __user *d; + int err; + + if (copy_from_user(&d32, argp, sizeof(d32))) + return -EFAULT; + + d = compat_alloc_user_space(sizeof(*d)); + if (!access_ok(VERIFY_WRITE, d, sizeof(*d))) + return -EFAULT; + + if (__put_user(d32.context, &d->context) + || __put_user(d32.send_count, &d->send_count) + || __put_user((int __user *)(unsigned long) d32.send_indices, + &d->send_indices) + || __put_user((int __user *)(unsigned long) d32.send_sizes, + &d->send_sizes) + || __put_user(d32.flags, &d->flags) + || __put_user(d32.request_count, &d->request_count) + || __put_user((int __user *)(unsigned long) d32.request_indices, + &d->request_indices) + || __put_user((int __user *)(unsigned long) d32.request_sizes, + &d->request_sizes)) + return -EFAULT; + + err = drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_DMA, (unsigned long) d); + if (err) + return err; + + if (__get_user(d32.request_size, &d->request_size) + || __get_user(d32.granted_count, &d->granted_count) + || __put_user(d32.request_size, &argp->request_size) + || __put_user(d32.granted_count, &argp->granted_count)) + return -EFAULT; + + return 0; +} + +#if __OS_HAS_AGP +typedef struct drm_agp_mode32 { + u32 mode; /**< AGP mode */ +} drm_agp_mode32_t; + +static int compat_drm_agp_enable(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_agp_mode32_t __user *argp = (void __user *)arg; + drm_agp_mode32_t m32; + drm_agp_mode_t __user *mode; + + if (get_user(m32.mode, &argp->mode)) + return -EFAULT; + + mode = compat_alloc_user_space(sizeof(*mode)); + if (put_user(m32.mode, &mode->mode)) + return -EFAULT; + + return drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_AGP_ENABLE, (unsigned long) mode); +} + +typedef struct drm_agp_info32 { + int agp_version_major; + int agp_version_minor; + u32 mode; + u32 aperture_base; /* physical address */ + u32 aperture_size; /* bytes */ + u32 memory_allowed; /* bytes */ + u32 memory_used; + + /* PCI information */ + unsigned short id_vendor; + unsigned short id_device; +} drm_agp_info32_t; + +static int compat_drm_agp_info(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_agp_info32_t __user *argp = (void __user *)arg; + drm_agp_info32_t i32; + drm_agp_info_t __user *info; + int err; + + info = compat_alloc_user_space(sizeof(*info)); + if (!access_ok(VERIFY_WRITE, info, sizeof(*info))) + return -EFAULT; + + err = drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_AGP_INFO, (unsigned long) info); + if (err) + return err; + + if (__get_user(i32.agp_version_major, &info->agp_version_major) + || __get_user(i32.agp_version_minor, &info->agp_version_minor) + || __get_user(i32.mode, &info->mode) + || __get_user(i32.aperture_base, &info->aperture_base) + || __get_user(i32.aperture_size, &info->aperture_size) + || __get_user(i32.memory_allowed, &info->memory_allowed) + || __get_user(i32.memory_used, &info->memory_used) + || __get_user(i32.id_vendor, &info->id_vendor) + || __get_user(i32.id_device, &info->id_device)) + return -EFAULT; + + if (copy_to_user(argp, &i32, sizeof(i32))) + return -EFAULT; + + return 0; +} + +typedef struct drm_agp_buffer32 { + u32 size; /**< In bytes -- will round to page boundary */ + u32 handle; /**< Used for binding / unbinding */ + u32 type; /**< Type of memory to allocate */ + u32 physical; /**< Physical used by i810 */ +} drm_agp_buffer32_t; + +static int compat_drm_agp_alloc(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_agp_buffer32_t __user *argp = (void __user *)arg; + drm_agp_buffer32_t req32; + drm_agp_buffer_t __user *request; + int err; + + if (copy_from_user(&req32, argp, sizeof(req32))) + return -EFAULT; + + request = compat_alloc_user_space(sizeof(*request)); + if (!access_ok(VERIFY_WRITE, request, sizeof(*request)) + || __put_user(req32.size, &request->size) + || __put_user(req32.type, &request->type)) + return -EFAULT; + + err = drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_AGP_ALLOC, (unsigned long) request); + if (err) + return err; + + if (__get_user(req32.handle, &request->handle) + || __get_user(req32.physical, &request->physical) + || copy_to_user(argp, &req32, sizeof(req32))) { + drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_AGP_FREE, (unsigned long) request); + return -EFAULT; + } + + return 0; +} + +static int compat_drm_agp_free(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_agp_buffer32_t __user *argp = (void __user *)arg; + drm_agp_buffer_t __user *request; + u32 handle; + + request = compat_alloc_user_space(sizeof(*request)); + if (!access_ok(VERIFY_WRITE, request, sizeof(*request)) + || get_user(handle, &argp->handle) + || __put_user(handle, &request->handle)) + return -EFAULT; + + return drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_AGP_FREE, (unsigned long) request); +} + +typedef struct drm_agp_binding32 { + u32 handle; /**< From drm_agp_buffer */ + u32 offset; /**< In bytes -- will round to page boundary */ +} drm_agp_binding32_t; + +static int compat_drm_agp_bind(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_agp_binding32_t __user *argp = (void __user *)arg; + drm_agp_binding32_t req32; + drm_agp_binding_t __user *request; + + if (copy_from_user(&req32, argp, sizeof(req32))) + return -EFAULT; + + request = compat_alloc_user_space(sizeof(*request)); + if (!access_ok(VERIFY_WRITE, request, sizeof(*request)) + || __put_user(req32.handle, &request->handle) + || __put_user(req32.offset, &request->offset)) + return -EFAULT; + + return drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_AGP_BIND, (unsigned long) request); +} + +static int compat_drm_agp_unbind(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_agp_binding32_t __user *argp = (void __user *)arg; + drm_agp_binding_t __user *request; + u32 handle; + + request = compat_alloc_user_space(sizeof(*request)); + if (!access_ok(VERIFY_WRITE, request, sizeof(*request)) + || get_user(handle, &argp->handle) + || __put_user(handle, &request->handle)) + return -EFAULT; + + return drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_AGP_UNBIND, (unsigned long) request); +} +#endif /* __OS_HAS_AGP */ + +typedef struct drm_scatter_gather32 { + u32 size; /**< In bytes -- will round to page boundary */ + u32 handle; /**< Used for mapping / unmapping */ +} drm_scatter_gather32_t; + +static int compat_drm_sg_alloc(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_scatter_gather32_t __user *argp = (void __user *)arg; + drm_scatter_gather_t __user *request; + int err; + unsigned long x; + + request = compat_alloc_user_space(sizeof(*request)); + if (!access_ok(VERIFY_WRITE, request, sizeof(*request)) + || !access_ok(VERIFY_WRITE, argp, sizeof(*argp)) + || __get_user(x, &argp->size) + || __put_user(x, &request->size)) + return -EFAULT; + + err = drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_SG_ALLOC, (unsigned long) request); + if (err) + return err; + + /* XXX not sure about the handle conversion here... */ + if (__get_user(x, &request->handle) + || __put_user(x >> PAGE_SHIFT, &argp->handle)) + return -EFAULT; + + return 0; +} + +static int compat_drm_sg_free(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_scatter_gather32_t __user *argp = (void __user *)arg; + drm_scatter_gather_t __user *request; + unsigned long x; + + request = compat_alloc_user_space(sizeof(*request)); + if (!access_ok(VERIFY_WRITE, request, sizeof(*request)) + || !access_ok(VERIFY_WRITE, argp, sizeof(*argp)) + || __get_user(x, &argp->handle) + || __put_user(x << PAGE_SHIFT, &request->handle)) + return -EFAULT; + + return drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_SG_FREE, (unsigned long) request); +} + +struct drm_wait_vblank_request32 { + drm_vblank_seq_type_t type; + unsigned int sequence; + u32 signal; +}; + +struct drm_wait_vblank_reply32 { + drm_vblank_seq_type_t type; + unsigned int sequence; + s32 tval_sec; + s32 tval_usec; +}; + +typedef union drm_wait_vblank32 { + struct drm_wait_vblank_request32 request; + struct drm_wait_vblank_reply32 reply; +} drm_wait_vblank32_t; + +static int compat_drm_wait_vblank(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_wait_vblank32_t __user *argp = (void __user *)arg; + drm_wait_vblank32_t req32; + drm_wait_vblank_t __user *request; + int err; + + if (copy_from_user(&req32, argp, sizeof(req32))) + return -EFAULT; + + request = compat_alloc_user_space(sizeof(*request)); + if (!access_ok(VERIFY_WRITE, request, sizeof(*request)) + || __put_user(req32.request.type, &request->request.type) + || __put_user(req32.request.sequence, &request->request.sequence) + || __put_user(req32.request.signal, &request->request.signal)) + return -EFAULT; + + err = drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_WAIT_VBLANK, (unsigned long) request); + if (err) + return err; + + if (__get_user(req32.reply.type, &request->reply.type) + || __get_user(req32.reply.sequence, &request->reply.sequence) + || __get_user(req32.reply.tval_sec, &request->reply.tval_sec) + || __get_user(req32.reply.tval_usec, &request->reply.tval_usec)) + return -EFAULT; + + if (copy_to_user(argp, &req32, sizeof(req32))) + return -EFAULT; + + return 0; +} + +drm_ioctl_compat_t *drm_compat_ioctls[] = { + [DRM_IOCTL_NR(DRM_IOCTL_VERSION32)] = compat_drm_version, + [DRM_IOCTL_NR(DRM_IOCTL_GET_UNIQUE32)] = compat_drm_getunique, + [DRM_IOCTL_NR(DRM_IOCTL_GET_MAP32)] = compat_drm_getmap, + [DRM_IOCTL_NR(DRM_IOCTL_GET_CLIENT32)] = compat_drm_getclient, + [DRM_IOCTL_NR(DRM_IOCTL_GET_STATS32)] = compat_drm_getstats, + [DRM_IOCTL_NR(DRM_IOCTL_SET_UNIQUE32)] = compat_drm_setunique, + [DRM_IOCTL_NR(DRM_IOCTL_ADD_MAP32)] = compat_drm_addmap, + [DRM_IOCTL_NR(DRM_IOCTL_ADD_BUFS32)] = compat_drm_addbufs, + [DRM_IOCTL_NR(DRM_IOCTL_MARK_BUFS32)] = compat_drm_markbufs, + [DRM_IOCTL_NR(DRM_IOCTL_INFO_BUFS32)] = compat_drm_infobufs, + [DRM_IOCTL_NR(DRM_IOCTL_MAP_BUFS32)] = compat_drm_mapbufs, + [DRM_IOCTL_NR(DRM_IOCTL_FREE_BUFS32)] = compat_drm_freebufs, + [DRM_IOCTL_NR(DRM_IOCTL_RM_MAP32)] = compat_drm_rmmap, + [DRM_IOCTL_NR(DRM_IOCTL_SET_SAREA_CTX32)] = compat_drm_setsareactx, + [DRM_IOCTL_NR(DRM_IOCTL_GET_SAREA_CTX32)] = compat_drm_getsareactx, + [DRM_IOCTL_NR(DRM_IOCTL_RES_CTX32)] = compat_drm_resctx, + [DRM_IOCTL_NR(DRM_IOCTL_DMA32)] = compat_drm_dma, +#if __OS_HAS_AGP + [DRM_IOCTL_NR(DRM_IOCTL_AGP_ENABLE32)] = compat_drm_agp_enable, + [DRM_IOCTL_NR(DRM_IOCTL_AGP_INFO32)] = compat_drm_agp_info, + [DRM_IOCTL_NR(DRM_IOCTL_AGP_ALLOC32)] = compat_drm_agp_alloc, + [DRM_IOCTL_NR(DRM_IOCTL_AGP_FREE32)] = compat_drm_agp_free, + [DRM_IOCTL_NR(DRM_IOCTL_AGP_BIND32)] = compat_drm_agp_bind, + [DRM_IOCTL_NR(DRM_IOCTL_AGP_UNBIND32)] = compat_drm_agp_unbind, +#endif + [DRM_IOCTL_NR(DRM_IOCTL_SG_ALLOC32)] = compat_drm_sg_alloc, + [DRM_IOCTL_NR(DRM_IOCTL_SG_FREE32)] = compat_drm_sg_free, + [DRM_IOCTL_NR(DRM_IOCTL_WAIT_VBLANK32)] = compat_drm_wait_vblank, +}; + +/** + * Called whenever a 32-bit process running under a 64-bit kernel + * performs an ioctl on /dev/drm. + * + * \param filp file pointer. + * \param cmd command. + * \param arg user argument. + * \return zero on success or negative number on failure. + */ +long drm_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + unsigned int nr = DRM_IOCTL_NR(cmd); + drm_ioctl_compat_t *fn; + int ret; + + if (nr >= DRM_ARRAY_SIZE(drm_compat_ioctls)) + return -ENOTTY; + + fn = drm_compat_ioctls[nr]; + + lock_kernel(); /* XXX for now */ + if (fn != NULL) + ret = (*fn)(filp, cmd, arg); + else + ret = drm_ioctl(filp->f_dentry->d_inode, filp, cmd, arg); + unlock_kernel(); + + return ret; +} +EXPORT_SYMBOL(drm_compat_ioctl); diff --git a/drivers/char/drm/radeon_drv.c b/drivers/char/drm/radeon_drv.c index 7b983d96e53..18e4e5b0952 100644 --- a/drivers/char/drm/radeon_drv.c +++ b/drivers/char/drm/radeon_drv.c @@ -101,6 +101,9 @@ static struct drm_driver driver = { .mmap = drm_mmap, .poll = drm_poll, .fasync = drm_fasync, +#ifdef CONFIG_COMPAT + .compat_ioctl = radeon_compat_ioctl, +#endif }, .pci_driver = { .name = DRIVER_NAME, diff --git a/drivers/char/drm/radeon_drv.h b/drivers/char/drm/radeon_drv.h index 5837098afae..771aa80a5e8 100644 --- a/drivers/char/drm/radeon_drv.h +++ b/drivers/char/drm/radeon_drv.h @@ -317,6 +317,9 @@ extern int radeon_preinit( struct drm_device *dev, unsigned long flags ); extern int radeon_postinit( struct drm_device *dev, unsigned long flags ); extern int radeon_postcleanup( struct drm_device *dev ); +extern long radeon_compat_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg); + /* Flags for stats.boxes */ #define RADEON_BOX_DMA_IDLE 0x1 diff --git a/drivers/char/drm/radeon_ioc32.c b/drivers/char/drm/radeon_ioc32.c new file mode 100644 index 00000000000..bfe612215fb --- /dev/null +++ b/drivers/char/drm/radeon_ioc32.c @@ -0,0 +1,395 @@ +/** + * \file radeon_ioc32.c + * + * 32-bit ioctl compatibility routines for the Radeon DRM. + * + * \author Paul Mackerras + * + * Copyright (C) Paul Mackerras 2005 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#include +#include + +#include "drmP.h" +#include "drm.h" +#include "radeon_drm.h" +#include "radeon_drv.h" + +typedef struct drm_radeon_init32 { + int func; + u32 sarea_priv_offset; + int is_pci; + int cp_mode; + int gart_size; + int ring_size; + int usec_timeout; + + unsigned int fb_bpp; + unsigned int front_offset, front_pitch; + unsigned int back_offset, back_pitch; + unsigned int depth_bpp; + unsigned int depth_offset, depth_pitch; + + u32 fb_offset; + u32 mmio_offset; + u32 ring_offset; + u32 ring_rptr_offset; + u32 buffers_offset; + u32 gart_textures_offset; +} drm_radeon_init32_t; + +static int compat_radeon_cp_init(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_radeon_init32_t init32; + drm_radeon_init_t __user *init; + + if (copy_from_user(&init32, (void __user *)arg, sizeof(init32))) + return -EFAULT; + + init = compat_alloc_user_space(sizeof(*init)); + if (!access_ok(VERIFY_WRITE, init, sizeof(*init)) + || __put_user(init32.func, &init->func) + || __put_user(init32.sarea_priv_offset, &init->sarea_priv_offset) + || __put_user(init32.is_pci, &init->is_pci) + || __put_user(init32.cp_mode, &init->cp_mode) + || __put_user(init32.gart_size, &init->gart_size) + || __put_user(init32.ring_size, &init->ring_size) + || __put_user(init32.usec_timeout, &init->usec_timeout) + || __put_user(init32.fb_bpp, &init->fb_bpp) + || __put_user(init32.front_offset, &init->front_offset) + || __put_user(init32.front_pitch, &init->front_pitch) + || __put_user(init32.back_offset, &init->back_offset) + || __put_user(init32.back_pitch, &init->back_pitch) + || __put_user(init32.depth_bpp, &init->depth_bpp) + || __put_user(init32.depth_offset, &init->depth_offset) + || __put_user(init32.depth_pitch, &init->depth_pitch) + || __put_user(init32.fb_offset, &init->fb_offset) + || __put_user(init32.mmio_offset, &init->mmio_offset) + || __put_user(init32.ring_offset, &init->ring_offset) + || __put_user(init32.ring_rptr_offset, &init->ring_rptr_offset) + || __put_user(init32.buffers_offset, &init->buffers_offset) + || __put_user(init32.gart_textures_offset, + &init->gart_textures_offset)) + return -EFAULT; + + return drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_RADEON_CP_INIT, (unsigned long) init); +} + +typedef struct drm_radeon_clear32 { + unsigned int flags; + unsigned int clear_color; + unsigned int clear_depth; + unsigned int color_mask; + unsigned int depth_mask; /* misnamed field: should be stencil */ + u32 depth_boxes; +} drm_radeon_clear32_t; + +static int compat_radeon_cp_clear(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_radeon_clear32_t clr32; + drm_radeon_clear_t __user *clr; + + if (copy_from_user(&clr32, (void __user *)arg, sizeof(clr32))) + return -EFAULT; + + clr = compat_alloc_user_space(sizeof(*clr)); + if (!access_ok(VERIFY_WRITE, clr, sizeof(*clr)) + || __put_user(clr32.flags, &clr->flags) + || __put_user(clr32.clear_color, &clr->clear_color) + || __put_user(clr32.clear_depth, &clr->clear_depth) + || __put_user(clr32.color_mask, &clr->color_mask) + || __put_user(clr32.depth_mask, &clr->depth_mask) + || __put_user((void __user *)(unsigned long)clr32.depth_boxes, + &clr->depth_boxes)) + return -EFAULT; + + return drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_RADEON_CLEAR, (unsigned long) clr); +} + +typedef struct drm_radeon_stipple32 { + u32 mask; +} drm_radeon_stipple32_t; + +static int compat_radeon_cp_stipple(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_radeon_stipple32_t __user *argp = (void __user *) arg; + drm_radeon_stipple_t __user *request; + u32 mask; + + if (get_user(mask, &argp->mask)) + return -EFAULT; + + request = compat_alloc_user_space(sizeof(*request)); + if (!access_ok(VERIFY_WRITE, request, sizeof(*request)) + || __put_user((unsigned int __user *)(unsigned long) mask, + &request->mask)) + return -EFAULT; + + return drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_RADEON_STIPPLE, (unsigned long) request); +} + +typedef struct drm_radeon_tex_image32 { + unsigned int x, y; /* Blit coordinates */ + unsigned int width, height; + u32 data; +} drm_radeon_tex_image32_t; + +typedef struct drm_radeon_texture32 { + unsigned int offset; + int pitch; + int format; + int width; /* Texture image coordinates */ + int height; + u32 image; +} drm_radeon_texture32_t; + +static int compat_radeon_cp_texture(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_radeon_texture32_t req32; + drm_radeon_texture_t __user *request; + drm_radeon_tex_image32_t img32; + drm_radeon_tex_image_t __user *image; + + if (copy_from_user(&req32, (void __user *) arg, sizeof(req32))) + return -EFAULT; + if (req32.image == 0) + return -EINVAL; + if (copy_from_user(&img32, (void __user *)(unsigned long)req32.image, + sizeof(img32))) + return -EFAULT; + + request = compat_alloc_user_space(sizeof(*request) + sizeof(*image)); + if (!access_ok(VERIFY_WRITE, request, + sizeof(*request) + sizeof(*image))) + return -EFAULT; + image = (drm_radeon_tex_image_t __user *) (request + 1); + + if (__put_user(req32.offset, &request->offset) + || __put_user(req32.pitch, &request->pitch) + || __put_user(req32.format, &request->format) + || __put_user(req32.width, &request->width) + || __put_user(req32.height, &request->height) + || __put_user(image, &request->image) + || __put_user(img32.x, &image->x) + || __put_user(img32.y, &image->y) + || __put_user(img32.width, &image->width) + || __put_user(img32.height, &image->height) + || __put_user((const void __user *)(unsigned long)img32.data, + &image->data)) + return -EFAULT; + + return drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_RADEON_TEXTURE, (unsigned long) request); +} + +typedef struct drm_radeon_vertex2_32 { + int idx; /* Index of vertex buffer */ + int discard; /* Client finished with buffer? */ + int nr_states; + u32 state; + int nr_prims; + u32 prim; +} drm_radeon_vertex2_32_t; + +static int compat_radeon_cp_vertex2(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_radeon_vertex2_32_t req32; + drm_radeon_vertex2_t __user *request; + + if (copy_from_user(&req32, (void __user *) arg, sizeof(req32))) + return -EFAULT; + + request = compat_alloc_user_space(sizeof(*request)); + if (!access_ok(VERIFY_WRITE, request, sizeof(*request)) + || __put_user(req32.idx, &request->idx) + || __put_user(req32.discard, &request->discard) + || __put_user(req32.nr_states, &request->nr_states) + || __put_user((void __user *)(unsigned long)req32.state, + &request->state) + || __put_user(req32.nr_prims, &request->nr_prims) + || __put_user((void __user *)(unsigned long)req32.prim, + &request->prim)) + return -EFAULT; + + return drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_RADEON_VERTEX2, (unsigned long) request); +} + +typedef struct drm_radeon_cmd_buffer32 { + int bufsz; + u32 buf; + int nbox; + u32 boxes; +} drm_radeon_cmd_buffer32_t; + +static int compat_radeon_cp_cmdbuf(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_radeon_cmd_buffer32_t req32; + drm_radeon_cmd_buffer_t __user *request; + + if (copy_from_user(&req32, (void __user *) arg, sizeof(req32))) + return -EFAULT; + + request = compat_alloc_user_space(sizeof(*request)); + if (!access_ok(VERIFY_WRITE, request, sizeof(*request)) + || __put_user(req32.bufsz, &request->bufsz) + || __put_user((void __user *)(unsigned long)req32.buf, + &request->buf) + || __put_user(req32.nbox, &request->nbox) + || __put_user((void __user *)(unsigned long)req32.boxes, + &request->boxes)) + return -EFAULT; + + return drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_RADEON_CMDBUF, (unsigned long) request); +} + +typedef struct drm_radeon_getparam32 { + int param; + u32 value; +} drm_radeon_getparam32_t; + +static int compat_radeon_cp_getparam(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_radeon_getparam32_t req32; + drm_radeon_getparam_t __user *request; + + if (copy_from_user(&req32, (void __user *) arg, sizeof(req32))) + return -EFAULT; + + request = compat_alloc_user_space(sizeof(*request)); + if (!access_ok(VERIFY_WRITE, request, sizeof(*request)) + || __put_user(req32.param, &request->param) + || __put_user((void __user *)(unsigned long)req32.value, + &request->value)) + return -EFAULT; + + return drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_RADEON_GETPARAM, (unsigned long) request); +} + +typedef struct drm_radeon_mem_alloc32 { + int region; + int alignment; + int size; + u32 region_offset; /* offset from start of fb or GART */ +} drm_radeon_mem_alloc32_t; + +static int compat_radeon_mem_alloc(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_radeon_mem_alloc32_t req32; + drm_radeon_mem_alloc_t __user *request; + + if (copy_from_user(&req32, (void __user *) arg, sizeof(req32))) + return -EFAULT; + + request = compat_alloc_user_space(sizeof(*request)); + if (!access_ok(VERIFY_WRITE, request, sizeof(*request)) + || __put_user(req32.region, &request->region) + || __put_user(req32.alignment, &request->alignment) + || __put_user(req32.size, &request->size) + || __put_user((int __user *)(unsigned long)req32.region_offset, + &request->region_offset)) + return -EFAULT; + + return drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_RADEON_ALLOC, (unsigned long) request); +} + +typedef struct drm_radeon_irq_emit32 { + u32 irq_seq; +} drm_radeon_irq_emit32_t; + +static int compat_radeon_irq_emit(struct file *file, unsigned int cmd, + unsigned long arg) +{ + drm_radeon_irq_emit32_t req32; + drm_radeon_irq_emit_t __user *request; + + if (copy_from_user(&req32, (void __user *) arg, sizeof(req32))) + return -EFAULT; + + request = compat_alloc_user_space(sizeof(*request)); + if (!access_ok(VERIFY_WRITE, request, sizeof(*request)) + || __put_user((int __user *)(unsigned long)req32.irq_seq, + &request->irq_seq)) + return -EFAULT; + + return drm_ioctl(file->f_dentry->d_inode, file, + DRM_IOCTL_RADEON_IRQ_EMIT, (unsigned long) request); +} + +drm_ioctl_compat_t *radeon_compat_ioctls[] = { + [DRM_RADEON_CP_INIT] = compat_radeon_cp_init, + [DRM_RADEON_CLEAR] = compat_radeon_cp_clear, + [DRM_RADEON_STIPPLE] = compat_radeon_cp_stipple, + [DRM_RADEON_TEXTURE] = compat_radeon_cp_texture, + [DRM_RADEON_VERTEX2] = compat_radeon_cp_vertex2, + [DRM_RADEON_CMDBUF] = compat_radeon_cp_cmdbuf, + [DRM_RADEON_GETPARAM] = compat_radeon_cp_getparam, + [DRM_RADEON_ALLOC] = compat_radeon_mem_alloc, + [DRM_RADEON_IRQ_EMIT] = compat_radeon_irq_emit, +}; + +/** + * Called whenever a 32-bit process running under a 64-bit kernel + * performs an ioctl on /dev/dri/card. + * + * \param filp file pointer. + * \param cmd command. + * \param arg user argument. + * \return zero on success or negative number on failure. + */ +long radeon_compat_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + unsigned int nr = DRM_IOCTL_NR(cmd); + drm_ioctl_compat_t *fn = NULL; + int ret; + + if (nr < DRM_COMMAND_BASE) + return drm_compat_ioctl(filp, cmd, arg); + + if (nr < DRM_COMMAND_BASE + DRM_ARRAY_SIZE(radeon_compat_ioctls)) + fn = radeon_compat_ioctls[nr - DRM_COMMAND_BASE]; + + lock_kernel(); /* XXX for now */ + if (fn != NULL) + ret = (*fn)(filp, cmd, arg); + else + ret = drm_ioctl(filp->f_dentry->d_inode, filp, cmd, arg); + unlock_kernel(); + + return ret; +} -- cgit v1.2.3 From cfd9e15f78fc6efe88ea8cb0722a731b331cfd80 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 23 Jun 2005 22:43:00 +1000 Subject: Currently DRM depends on PCI this will need to change for ffb on Sparc to be fixed but at the moment it is true. Signed-off-by: Geert Uytterhoeven Signed-off-by: Dave Airlie --- drivers/char/drm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/drm/Kconfig b/drivers/char/drm/Kconfig index d9a02993467..c2b12eab67c 100644 --- a/drivers/char/drm/Kconfig +++ b/drivers/char/drm/Kconfig @@ -6,7 +6,7 @@ # config DRM tristate "Direct Rendering Manager (XFree86 4.1.0 and higher DRI support)" - depends on AGP || AGP=n + depends on (AGP || AGP=n) && PCI help Kernel-level support for the Direct Rendering Infrastructure (DRI) introduced in XFree86 4.0. If you say Y here, you need to select -- cgit v1.2.3 From bc54fd1ad3c5972be339a08528ab631326ed2b38 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 23 Jun 2005 22:46:46 +1000 Subject: Add missing license texts from Tungsten Graphics. From: Alan Hourihane Signed-off-by: David Airlie --- drivers/char/drm/i915_dma.c | 24 ++++++++++++++++++++++-- drivers/char/drm/i915_drm.h | 27 +++++++++++++++++++++++++++ drivers/char/drm/i915_drv.c | 25 ++++++++++++++++++++++--- drivers/char/drm/i915_drv.h | 24 ++++++++++++++++++++++-- drivers/char/drm/i915_irq.c | 24 ++++++++++++++++++++++-- drivers/char/drm/i915_mem.c | 24 ++++++++++++++++++++++-- 6 files changed, 137 insertions(+), 11 deletions(-) diff --git a/drivers/char/drm/i915_dma.c b/drivers/char/drm/i915_dma.c index 7300a09dbd5..b5903f9f142 100644 --- a/drivers/char/drm/i915_dma.c +++ b/drivers/char/drm/i915_dma.c @@ -1,10 +1,30 @@ /* i915_dma.c -- DMA support for the I915 -*- linux-c -*- */ /************************************************************************** - * + * * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. - * + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * **************************************************************************/ #include "drmP.h" diff --git a/drivers/char/drm/i915_drm.h b/drivers/char/drm/i915_drm.h index 7e55edf45c4..23e027d2908 100644 --- a/drivers/char/drm/i915_drm.h +++ b/drivers/char/drm/i915_drm.h @@ -1,3 +1,30 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #ifndef _I915_DRM_H_ #define _I915_DRM_H_ diff --git a/drivers/char/drm/i915_drv.c b/drivers/char/drm/i915_drv.c index 002b7082e21..e6a9e1d1d28 100644 --- a/drivers/char/drm/i915_drv.c +++ b/drivers/char/drm/i915_drv.c @@ -1,11 +1,30 @@ /* i915_drv.c -- i830,i845,i855,i865,i915 driver -*- linux-c -*- */ - /************************************************************************** - * + * * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. - * + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * **************************************************************************/ #include "drmP.h" diff --git a/drivers/char/drm/i915_drv.h b/drivers/char/drm/i915_drv.h index f6ca92a565d..fa940d64b85 100644 --- a/drivers/char/drm/i915_drv.h +++ b/drivers/char/drm/i915_drv.h @@ -1,10 +1,30 @@ /* i915_drv.h -- Private header for the I915 driver -*- linux-c -*- */ /************************************************************************** - * + * * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. - * + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * **************************************************************************/ #ifndef _I915_DRV_H_ diff --git a/drivers/char/drm/i915_irq.c b/drivers/char/drm/i915_irq.c index b0239262a84..a101cc9cfd7 100644 --- a/drivers/char/drm/i915_irq.c +++ b/drivers/char/drm/i915_irq.c @@ -1,10 +1,30 @@ /* i915_dma.c -- DMA support for the I915 -*- linux-c -*- */ /************************************************************************** - * + * * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. - * + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * **************************************************************************/ #include "drmP.h" diff --git a/drivers/char/drm/i915_mem.c b/drivers/char/drm/i915_mem.c index d54a3005946..9b1698f521b 100644 --- a/drivers/char/drm/i915_mem.c +++ b/drivers/char/drm/i915_mem.c @@ -1,10 +1,30 @@ /* i915_mem.c -- Simple agp/fb memory manager for i915 -*- linux-c -*- */ /************************************************************************** - * + * * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. - * + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * **************************************************************************/ #include "drmP.h" -- cgit v1.2.3 From f7704347a74fceaf79c89f8b8dbdd0111013e4d6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 24 Jun 2005 17:39:03 -0700 Subject: [PKT_SCHED]: Make TEXTSEARCH* options only selected. Do not present these confusing new options to the user unless he picked some facility that makes use of it, such as NET_EMATCH_TEXT. Signed-off-by: David S. Miller --- lib/Kconfig | 28 ++++++---------------------- net/sched/Kconfig | 5 +++-- 2 files changed, 9 insertions(+), 24 deletions(-) diff --git a/lib/Kconfig b/lib/Kconfig index 455833a9e31..eeb429a5215 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -63,32 +63,16 @@ config REED_SOLOMON_ENC16 config REED_SOLOMON_DEC16 boolean +# +# Textsearch support is select'ed if needed +# config TEXTSEARCH - boolean "Textsearch infrastructure" - default y - help - Say Y here if you want to provide a textsearch infrastructure - to other subsystems. + boolean config TEXTSEARCH_KMP - depends on TEXTSEARCH - tristate "Knuth-Morris-Pratt" - help - Say Y here if you want to be able to search text using the - Knuth-Morris-Pratt textsearch algorithm. - - To compile this code as a module, choose M here: the - module will be called ts_kmp. + tristate config TEXTSEARCH_FSM - depends on TEXTSEARCH - tristate "Finite state machine" - help - Say Y here if you want to be able to search text using a - naive finite state machine approach implementing a subset - of regular expressions. - - To compile this code as a module, choose M here: the - module will be called ts_fsm. + tristate endmenu diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 447b89e556b..7bac249258e 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -453,10 +453,11 @@ config NET_EMATCH_TEXT tristate "Textsearch" depends on NET_EMATCH select TEXTSEARCH + select TEXTSEARCH_KMP + select TEXTSEARCH_FSM ---help--- Say Y here if you want to be ablt to classify packets based on - textsearch comparisons. Please select the appropriate textsearch - algorithms in the Library section. + textsearch comparisons. To compile this code as a module, choose M here: the module will be called em_text. -- cgit v1.2.3 From bb298ca3ce92574d57c4e49b329421425ea7d279 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 24 Jun 2005 17:50:53 -0700 Subject: [IPV4]: Move FIB lookup algorithm choice under IP_ADVANCED_ROUTING Most users need not be concerned with a complex choice of what FIB lookup algorithm to use. So give them the safe default of IP_FIB_HASH if IP_ADVANCED_ROUTING is disabled. Signed-off-by: David S. Miller --- net/ipv4/Kconfig | 64 +++++++++++++++++++++++++++++++++----------------------- 1 file changed, 38 insertions(+), 26 deletions(-) diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 690e88ba248..7bcfb84126d 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -1,32 +1,6 @@ # # IP configuration # -choice - prompt "Choose IP: FIB lookup" - depends on INET - default IP_FIB_HASH - -config IP_FIB_HASH - bool "FIB_HASH" - ---help--- - Current FIB is very proven and good enough for most users. - -config IP_FIB_TRIE - bool "FIB_TRIE" - ---help--- - Use new experimental LC-trie as FIB lookup algoritm. - This improves lookup performance - - LC-trie is described in: - - IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson - IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999 - An experimental study of compression methods for dynamic tries - Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. - http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/ - -endchoice - config IP_MULTICAST bool "IP: multicasting" depends on INET @@ -79,6 +53,44 @@ config IP_ADVANCED_ROUTER If unsure, say N here. +choice + prompt "Choose IP: FIB lookup algorithm (choose FIB_HASH if unsure)" + depends on IP_ADVANCED_ROUTER + default IP_FIB_HASH + +config IP_FIB_HASH + bool "FIB_HASH" + ---help--- + Current FIB is very proven and good enough for most users. + +config IP_FIB_TRIE + bool "FIB_TRIE" + ---help--- + Use new experimental LC-trie as FIB lookup algoritm. + This improves lookup performance if you have a large + number of routes. + + LC-trie is a longest matching prefix lookup algorithm which + performs better than FIB_HASH for large routing tables. + But, it consumes more memory and is more complex. + + LC-trie is described in: + + IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson + IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999 + An experimental study of compression methods for dynamic tries + Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. + http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/ + +endchoice + +# If the user does not enable advanced routing, he gets the safe +# default of the fib-hash algorithm. +config IP_FIB_HASH + bool + depends on !IP_ADVANCED_ROUTER + default y + config IP_MULTIPLE_TABLES bool "IP: policy routing" depends on IP_ADVANCED_ROUTER -- cgit v1.2.3 From a6484045fdd4154f8c8ee8c1dda4e32854c047e0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 24 Jun 2005 18:07:51 -0700 Subject: [TCP]: Do not present confusing congestion control options by default. Create TCP_CONG_ADVANCED option, akin to IP_ADVANCED_ROUTER, which when disabled will bypass all of the congestion control Kconfig options and leave the user with a safe default. That safe default is currently BIC-TCP with new Reno as a fallback. Signed-off-by: David S. Miller --- net/ipv4/Kconfig | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 7bcfb84126d..34708343312 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -445,9 +445,22 @@ config IP_TCPDIAG config IP_TCPDIAG_IPV6 def_bool (IP_TCPDIAG=y && IPV6=y) || (IP_TCPDIAG=m && IPV6) +config TCP_CONG_ADVANCED + bool "TCP: advanced congestion control" + depends on INET + default y + ---help--- + Support for selection of various TCP congestion control + modules. + + Nearly all users can safely say no here, and a safe default + selection will be made (BIC-TCP with new Reno as a fallback). + + If unsure, say N. + # TCP Reno is builtin (required as fallback) menu "TCP congestion control" - depends on INET + depends on TCP_CONG_ADVANCED config TCP_CONG_BIC tristate "Binary Increase Congestion (BIC) control" @@ -535,5 +548,10 @@ config TCP_CONG_SCALABLE endmenu +config TCP_CONG_BIC + boolean + depends on !TCP_CONG_ADVANCED + default y + source "net/ipv4/ipvs/Kconfig" -- cgit v1.2.3 From c54d7e03c3a21b38c587f671704c5a12aa3987fc Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 24 Jun 2005 19:57:07 -0700 Subject: [SUNRPC]: Fix {s,}size_t printf format strings in xprt.c Signed-off-by: David S. Miller --- net/sunrpc/xprt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index eca92405948..269f217918a 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -970,7 +970,7 @@ tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc) goto out; } - dprintk("RPC: XID %08x read %u bytes\n", + dprintk("RPC: XID %08x read %Zd bytes\n", ntohl(xprt->tcp_xid), r); dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n", xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen); @@ -1006,7 +1006,7 @@ tcp_read_discard(struct rpc_xprt *xprt, skb_reader_t *desc) desc->count -= len; desc->offset += len; xprt->tcp_offset += len; - dprintk("RPC: discarded %u bytes\n", len); + dprintk("RPC: discarded %Zu bytes\n", len); tcp_check_recm(xprt); } -- cgit v1.2.3 From 37616578539a47d9ace5e907ae73ea93a8cde740 Mon Sep 17 00:00:00 2001 From: William Lee Irwin III Date: Fri, 24 Jun 2005 20:06:18 -0700 Subject: [SPARC]: sunzilog warning fixes From: William Lee Irwin III This small patch silences some iomem-related warnings in sunzilog.c by declaring mapped_addr as void __iomem * and inserting a cast in one case. Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- drivers/serial/sunzilog.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/serial/sunzilog.c b/drivers/serial/sunzilog.c index 5c4231ae295..8e65206d3d7 100644 --- a/drivers/serial/sunzilog.c +++ b/drivers/serial/sunzilog.c @@ -1071,7 +1071,7 @@ static void __init sunzilog_alloc_tables(void) */ static struct zilog_layout __iomem * __init get_zs_sun4u(int chip, int zsnode) { - unsigned long mapped_addr; + void __iomem *mapped_addr; unsigned int sun4u_ino; struct sbus_bus *sbus = NULL; struct sbus_dev *sdev = NULL; @@ -1111,9 +1111,9 @@ static struct zilog_layout __iomem * __init get_zs_sun4u(int chip, int zsnode) apply_fhc_ranges(central_bus->child, &zsregs[0], 1); apply_central_ranges(central_bus, &zsregs[0], 1); - mapped_addr = - (((u64)zsregs[0].which_io)<<32UL) | - ((u64)zsregs[0].phys_addr); + mapped_addr = (void __iomem *) + ((((u64)zsregs[0].which_io)<<32UL) | + ((u64)zsregs[0].phys_addr)); } if (zilog_irq == -1) { -- cgit v1.2.3 From e55c57e0b51c68d78845549505057169c6c3cba6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 24 Jun 2005 20:11:03 -0700 Subject: [SPARC64]: Report any user access faults in termios accessors. Signed-off-by: David S. Miller --- include/asm-sparc64/termios.h | 78 +++++++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 37 deletions(-) diff --git a/include/asm-sparc64/termios.h b/include/asm-sparc64/termios.h index 8effce0da08..9777a9cca88 100644 --- a/include/asm-sparc64/termios.h +++ b/include/asm-sparc64/termios.h @@ -100,16 +100,17 @@ struct winsize { #define user_termio_to_kernel_termios(termios, termio) \ ({ \ unsigned short tmp; \ - get_user(tmp, &(termio)->c_iflag); \ + int err; \ + err = get_user(tmp, &(termio)->c_iflag); \ (termios)->c_iflag = (0xffff0000 & ((termios)->c_iflag)) | tmp; \ - get_user(tmp, &(termio)->c_oflag); \ + err |= get_user(tmp, &(termio)->c_oflag); \ (termios)->c_oflag = (0xffff0000 & ((termios)->c_oflag)) | tmp; \ - get_user(tmp, &(termio)->c_cflag); \ + err |= get_user(tmp, &(termio)->c_cflag); \ (termios)->c_cflag = (0xffff0000 & ((termios)->c_cflag)) | tmp; \ - get_user(tmp, &(termio)->c_lflag); \ + err |= get_user(tmp, &(termio)->c_lflag); \ (termios)->c_lflag = (0xffff0000 & ((termios)->c_lflag)) | tmp; \ - copy_from_user((termios)->c_cc, (termio)->c_cc, NCC); \ - 0; \ + err |= copy_from_user((termios)->c_cc, (termio)->c_cc, NCC); \ + err; \ }) /* @@ -119,53 +120,56 @@ struct winsize { */ #define kernel_termios_to_user_termio(termio, termios) \ ({ \ - put_user((termios)->c_iflag, &(termio)->c_iflag); \ - put_user((termios)->c_oflag, &(termio)->c_oflag); \ - put_user((termios)->c_cflag, &(termio)->c_cflag); \ - put_user((termios)->c_lflag, &(termio)->c_lflag); \ - put_user((termios)->c_line, &(termio)->c_line); \ - copy_to_user((termio)->c_cc, (termios)->c_cc, NCC); \ + int err; \ + err = put_user((termios)->c_iflag, &(termio)->c_iflag); \ + err |= put_user((termios)->c_oflag, &(termio)->c_oflag); \ + err |= put_user((termios)->c_cflag, &(termio)->c_cflag); \ + err |= put_user((termios)->c_lflag, &(termio)->c_lflag); \ + err |= put_user((termios)->c_line, &(termio)->c_line); \ + err |= copy_to_user((termio)->c_cc, (termios)->c_cc, NCC); \ if (!((termios)->c_lflag & ICANON)) { \ - put_user((termios)->c_cc[VMIN], &(termio)->c_cc[_VMIN]); \ - put_user((termios)->c_cc[VTIME], &(termio)->c_cc[_VTIME]); \ + err |= put_user((termios)->c_cc[VMIN], &(termio)->c_cc[_VMIN]); \ + err |= put_user((termios)->c_cc[VTIME], &(termio)->c_cc[_VTIME]); \ } \ - 0; \ + err; \ }) #define user_termios_to_kernel_termios(k, u) \ ({ \ - get_user((k)->c_iflag, &(u)->c_iflag); \ - get_user((k)->c_oflag, &(u)->c_oflag); \ - get_user((k)->c_cflag, &(u)->c_cflag); \ - get_user((k)->c_lflag, &(u)->c_lflag); \ - get_user((k)->c_line, &(u)->c_line); \ - copy_from_user((k)->c_cc, (u)->c_cc, NCCS); \ + int err; \ + err = get_user((k)->c_iflag, &(u)->c_iflag); \ + err |= get_user((k)->c_oflag, &(u)->c_oflag); \ + err |= get_user((k)->c_cflag, &(u)->c_cflag); \ + err |= get_user((k)->c_lflag, &(u)->c_lflag); \ + err |= get_user((k)->c_line, &(u)->c_line); \ + err |= copy_from_user((k)->c_cc, (u)->c_cc, NCCS); \ if((k)->c_lflag & ICANON) { \ - get_user((k)->c_cc[VEOF], &(u)->c_cc[VEOF]); \ - get_user((k)->c_cc[VEOL], &(u)->c_cc[VEOL]); \ + err |= get_user((k)->c_cc[VEOF], &(u)->c_cc[VEOF]); \ + err |= get_user((k)->c_cc[VEOL], &(u)->c_cc[VEOL]); \ } else { \ - get_user((k)->c_cc[VMIN], &(u)->c_cc[_VMIN]); \ - get_user((k)->c_cc[VTIME], &(u)->c_cc[_VTIME]); \ + err |= get_user((k)->c_cc[VMIN], &(u)->c_cc[_VMIN]); \ + err |= get_user((k)->c_cc[VTIME], &(u)->c_cc[_VTIME]); \ } \ - 0; \ + err; \ }) #define kernel_termios_to_user_termios(u, k) \ ({ \ - put_user((k)->c_iflag, &(u)->c_iflag); \ - put_user((k)->c_oflag, &(u)->c_oflag); \ - put_user((k)->c_cflag, &(u)->c_cflag); \ - put_user((k)->c_lflag, &(u)->c_lflag); \ - put_user((k)->c_line, &(u)->c_line); \ - copy_to_user((u)->c_cc, (k)->c_cc, NCCS); \ + int err; \ + err = put_user((k)->c_iflag, &(u)->c_iflag); \ + err |= put_user((k)->c_oflag, &(u)->c_oflag); \ + err |= put_user((k)->c_cflag, &(u)->c_cflag); \ + err |= put_user((k)->c_lflag, &(u)->c_lflag); \ + err |= put_user((k)->c_line, &(u)->c_line); \ + err |= copy_to_user((u)->c_cc, (k)->c_cc, NCCS); \ if(!((k)->c_lflag & ICANON)) { \ - put_user((k)->c_cc[VMIN], &(u)->c_cc[_VMIN]); \ - put_user((k)->c_cc[VTIME], &(u)->c_cc[_VTIME]); \ + err |= put_user((k)->c_cc[VMIN], &(u)->c_cc[_VMIN]); \ + err |= put_user((k)->c_cc[VTIME], &(u)->c_cc[_VTIME]); \ } else { \ - put_user((k)->c_cc[VEOF], &(u)->c_cc[VEOF]); \ - put_user((k)->c_cc[VEOL], &(u)->c_cc[VEOL]); \ + err |= put_user((k)->c_cc[VEOF], &(u)->c_cc[VEOF]); \ + err |= put_user((k)->c_cc[VEOL], &(u)->c_cc[VEOL]); \ } \ - 0; \ + err; \ }) #endif /* __KERNEL__ */ -- cgit v1.2.3 From cd024c8baf9756759c57f0a19be639da8d3d4f8c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 24 Jun 2005 20:17:10 -0700 Subject: [TG3]: Fix missing memory barriers and SD_STATUS_UPDATED bit clearing. There must be a rmb() between reading the status block tag and calling tg3_has_work(). This was missing in tg3_mis() and tg3_interrupt_tagged(). tg3_poll() got it right. Also, SD_STATUS_UPDATED must be cleared in the status block right before we call tg3_has_work(). Only tg3_poll() got this wrong. Based upon patches and commentary from Grant Grundler and Michael Chan. Signed-off-by: David S. Miller --- drivers/net/tg3.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index a0b8848049c..fef1d087107 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -2929,6 +2929,7 @@ static int tg3_poll(struct net_device *netdev, int *budget) if (tp->tg3_flags & TG3_FLAG_TAGGED_STATUS) tp->last_tag = sblk->status_tag; rmb(); + sblk->status &= ~SD_STATUS_UPDATED; /* if no more work, tell net stack and NIC we're done */ done = !tg3_has_work(tp); @@ -2964,6 +2965,7 @@ static irqreturn_t tg3_msi(int irq, void *dev_id, struct pt_regs *regs) */ tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0x00000001); tp->last_tag = sblk->status_tag; + rmb(); sblk->status &= ~SD_STATUS_UPDATED; if (likely(tg3_has_work(tp))) netif_rx_schedule(dev); /* schedule NAPI poll */ @@ -3051,6 +3053,7 @@ static irqreturn_t tg3_interrupt_tagged(int irq, void *dev_id, struct pt_regs *r tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0x00000001); tp->last_tag = sblk->status_tag; + rmb(); sblk->status &= ~SD_STATUS_UPDATED; if (likely(tg3_has_work(tp))) netif_rx_schedule(dev); /* schedule NAPI poll */ -- cgit v1.2.3 From f47c11eeccc8820010992eb32dbe7370a08f8bd3 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 24 Jun 2005 20:18:35 -0700 Subject: [TG3]: Eliminate all hw IRQ handler spinlocks. Move all driver spinlocks to be taken at sw IRQ context only. This fixes the skb_copy() we were doing with hw IRQs disabled (which is illegal and triggers a BUG() with HIGHMEM enabled). It also simplifies the locking all over the driver tremendously. We accomplish this feat by creating a special sequence to synchronize with the hw IRQ handler using a binary state and synchronize_irq(). This idea is from Herbert Xu. Thanks to Michael Chan for helping to track down all of the race conditions in initial versions of this code. Signed-off-by: David S. Miller --- drivers/net/tg3.c | 304 +++++++++++++++++++++++------------------------------- drivers/net/tg3.h | 24 ++++- 2 files changed, 149 insertions(+), 179 deletions(-) diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index fef1d087107..8b8aa2ad578 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -337,12 +337,10 @@ static struct { static void tg3_write_indirect_reg32(struct tg3 *tp, u32 off, u32 val) { if ((tp->tg3_flags & TG3_FLAG_PCIX_TARGET_HWBUG) != 0) { - unsigned long flags; - - spin_lock_irqsave(&tp->indirect_lock, flags); + spin_lock_bh(&tp->indirect_lock); pci_write_config_dword(tp->pdev, TG3PCI_REG_BASE_ADDR, off); pci_write_config_dword(tp->pdev, TG3PCI_REG_DATA, val); - spin_unlock_irqrestore(&tp->indirect_lock, flags); + spin_unlock_bh(&tp->indirect_lock); } else { writel(val, tp->regs + off); if ((tp->tg3_flags & TG3_FLAG_5701_REG_WRITE_BUG) != 0) @@ -353,12 +351,10 @@ static void tg3_write_indirect_reg32(struct tg3 *tp, u32 off, u32 val) static void _tw32_flush(struct tg3 *tp, u32 off, u32 val) { if ((tp->tg3_flags & TG3_FLAG_PCIX_TARGET_HWBUG) != 0) { - unsigned long flags; - - spin_lock_irqsave(&tp->indirect_lock, flags); + spin_lock_bh(&tp->indirect_lock); pci_write_config_dword(tp->pdev, TG3PCI_REG_BASE_ADDR, off); pci_write_config_dword(tp->pdev, TG3PCI_REG_DATA, val); - spin_unlock_irqrestore(&tp->indirect_lock, flags); + spin_unlock_bh(&tp->indirect_lock); } else { void __iomem *dest = tp->regs + off; writel(val, dest); @@ -398,28 +394,24 @@ static inline void _tw32_tx_mbox(struct tg3 *tp, u32 off, u32 val) static void tg3_write_mem(struct tg3 *tp, u32 off, u32 val) { - unsigned long flags; - - spin_lock_irqsave(&tp->indirect_lock, flags); + spin_lock_bh(&tp->indirect_lock); pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_BASE_ADDR, off); pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_DATA, val); /* Always leave this as zero. */ pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_BASE_ADDR, 0); - spin_unlock_irqrestore(&tp->indirect_lock, flags); + spin_unlock_bh(&tp->indirect_lock); } static void tg3_read_mem(struct tg3 *tp, u32 off, u32 *val) { - unsigned long flags; - - spin_lock_irqsave(&tp->indirect_lock, flags); + spin_lock_bh(&tp->indirect_lock); pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_BASE_ADDR, off); pci_read_config_dword(tp->pdev, TG3PCI_MEM_WIN_DATA, val); /* Always leave this as zero. */ pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_BASE_ADDR, 0); - spin_unlock_irqrestore(&tp->indirect_lock, flags); + spin_unlock_bh(&tp->indirect_lock); } static void tg3_disable_ints(struct tg3 *tp) @@ -443,7 +435,7 @@ static void tg3_enable_ints(struct tg3 *tp) tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, (tp->last_tag << 24)); tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW); - + tp->irq_sync = 0; tg3_cond_int(tp); } @@ -504,7 +496,8 @@ static inline void tg3_netif_start(struct tg3 *tp) * (such as after tg3_init_hw) */ netif_poll_enable(tp->dev); - tg3_cond_int(tp); + tp->hw_status->status |= SD_STATUS_UPDATED; + tg3_enable_ints(tp); } static void tg3_switch_clocks(struct tg3 *tp) @@ -2578,7 +2571,7 @@ static void tg3_tx(struct tg3 *tp) sw_idx = NEXT_TX(sw_idx); } - dev_kfree_skb_irq(skb); + dev_kfree_skb(skb); } tp->tx_cons = sw_idx; @@ -2884,11 +2877,8 @@ static int tg3_poll(struct net_device *netdev, int *budget) { struct tg3 *tp = netdev_priv(netdev); struct tg3_hw_status *sblk = tp->hw_status; - unsigned long flags; int done; - spin_lock_irqsave(&tp->lock, flags); - /* handle link change and other phy events */ if (!(tp->tg3_flags & (TG3_FLAG_USE_LINKCHG_REG | @@ -2896,7 +2886,9 @@ static int tg3_poll(struct net_device *netdev, int *budget) if (sblk->status & SD_STATUS_LINK_CHG) { sblk->status = SD_STATUS_UPDATED | (sblk->status & ~SD_STATUS_LINK_CHG); + spin_lock(&tp->lock); tg3_setup_phy(tp, 0); + spin_unlock(&tp->lock); } } @@ -2907,8 +2899,6 @@ static int tg3_poll(struct net_device *netdev, int *budget) spin_unlock(&tp->tx_lock); } - spin_unlock_irqrestore(&tp->lock, flags); - /* run RX thread, within the bounds set by NAPI. * All RX "locking" is done by ensuring outside * code synchronizes with dev->poll() @@ -2934,15 +2924,49 @@ static int tg3_poll(struct net_device *netdev, int *budget) /* if no more work, tell net stack and NIC we're done */ done = !tg3_has_work(tp); if (done) { - spin_lock_irqsave(&tp->lock, flags); - __netif_rx_complete(netdev); + spin_lock(&tp->lock); + netif_rx_complete(netdev); tg3_restart_ints(tp); - spin_unlock_irqrestore(&tp->lock, flags); + spin_unlock(&tp->lock); } return (done ? 0 : 1); } +static void tg3_irq_quiesce(struct tg3 *tp) +{ + BUG_ON(tp->irq_sync); + + tp->irq_sync = 1; + smp_mb(); + + synchronize_irq(tp->pdev->irq); +} + +static inline int tg3_irq_sync(struct tg3 *tp) +{ + return tp->irq_sync; +} + +/* Fully shutdown all tg3 driver activity elsewhere in the system. + * If irq_sync is non-zero, then the IRQ handler must be synchronized + * with as well. Most of the time, this is not necessary except when + * shutting down the device. + */ +static inline void tg3_full_lock(struct tg3 *tp, int irq_sync) +{ + if (irq_sync) + tg3_irq_quiesce(tp); + spin_lock_bh(&tp->lock); + spin_lock(&tp->tx_lock); +} + +static inline void tg3_full_unlock(struct tg3 *tp) +{ + spin_unlock(&tp->tx_lock); + spin_unlock_bh(&tp->lock); +} + /* MSI ISR - No need to check for interrupt sharing and no need to * flush status block and interrupt mailbox. PCI ordering rules * guarantee that MSI will arrive after the status block. @@ -2952,9 +2976,6 @@ static irqreturn_t tg3_msi(int irq, void *dev_id, struct pt_regs *regs) struct net_device *dev = dev_id; struct tg3 *tp = netdev_priv(dev); struct tg3_hw_status *sblk = tp->hw_status; - unsigned long flags; - - spin_lock_irqsave(&tp->lock, flags); /* * Writing any value to intr-mbox-0 clears PCI INTA# and @@ -2966,6 +2987,8 @@ static irqreturn_t tg3_msi(int irq, void *dev_id, struct pt_regs *regs) tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0x00000001); tp->last_tag = sblk->status_tag; rmb(); + if (tg3_irq_sync(tp)) + goto out; sblk->status &= ~SD_STATUS_UPDATED; if (likely(tg3_has_work(tp))) netif_rx_schedule(dev); /* schedule NAPI poll */ @@ -2974,9 +2997,7 @@ static irqreturn_t tg3_msi(int irq, void *dev_id, struct pt_regs *regs) tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, tp->last_tag << 24); } - - spin_unlock_irqrestore(&tp->lock, flags); - +out: return IRQ_RETVAL(1); } @@ -2985,11 +3006,8 @@ static irqreturn_t tg3_interrupt(int irq, void *dev_id, struct pt_regs *regs) struct net_device *dev = dev_id; struct tg3 *tp = netdev_priv(dev); struct tg3_hw_status *sblk = tp->hw_status; - unsigned long flags; unsigned int handled = 1; - spin_lock_irqsave(&tp->lock, flags); - /* In INTx mode, it is possible for the interrupt to arrive at * the CPU before the status block posted prior to the interrupt. * Reading the PCI State register will confirm whether the @@ -3006,6 +3024,8 @@ static irqreturn_t tg3_interrupt(int irq, void *dev_id, struct pt_regs *regs) */ tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0x00000001); + if (tg3_irq_sync(tp)) + goto out; sblk->status &= ~SD_STATUS_UPDATED; if (likely(tg3_has_work(tp))) netif_rx_schedule(dev); /* schedule NAPI poll */ @@ -3020,9 +3040,7 @@ static irqreturn_t tg3_interrupt(int irq, void *dev_id, struct pt_regs *regs) } else { /* shared interrupt */ handled = 0; } - - spin_unlock_irqrestore(&tp->lock, flags); - +out: return IRQ_RETVAL(handled); } @@ -3031,11 +3049,8 @@ static irqreturn_t tg3_interrupt_tagged(int irq, void *dev_id, struct pt_regs *r struct net_device *dev = dev_id; struct tg3 *tp = netdev_priv(dev); struct tg3_hw_status *sblk = tp->hw_status; - unsigned long flags; unsigned int handled = 1; - spin_lock_irqsave(&tp->lock, flags); - /* In INTx mode, it is possible for the interrupt to arrive at * the CPU before the status block posted prior to the interrupt. * Reading the PCI State register will confirm whether the @@ -3054,6 +3069,8 @@ static irqreturn_t tg3_interrupt_tagged(int irq, void *dev_id, struct pt_regs *r 0x00000001); tp->last_tag = sblk->status_tag; rmb(); + if (tg3_irq_sync(tp)) + goto out; sblk->status &= ~SD_STATUS_UPDATED; if (likely(tg3_has_work(tp))) netif_rx_schedule(dev); /* schedule NAPI poll */ @@ -3068,9 +3085,7 @@ static irqreturn_t tg3_interrupt_tagged(int irq, void *dev_id, struct pt_regs *r } else { /* shared interrupt */ handled = 0; } - - spin_unlock_irqrestore(&tp->lock, flags); - +out: return IRQ_RETVAL(handled); } @@ -3109,8 +3124,7 @@ static void tg3_reset_task(void *_data) tg3_netif_stop(tp); - spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); + tg3_full_lock(tp, 1); restart_timer = tp->tg3_flags2 & TG3_FLG2_RESTART_TIMER; tp->tg3_flags2 &= ~TG3_FLG2_RESTART_TIMER; @@ -3120,8 +3134,7 @@ static void tg3_reset_task(void *_data) tg3_netif_start(tp); - spin_unlock(&tp->tx_lock); - spin_unlock_irq(&tp->lock); + tg3_full_unlock(tp); if (restart_timer) mod_timer(&tp->timer, jiffies + 1); @@ -3227,39 +3240,21 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) unsigned int i; u32 len, entry, base_flags, mss; int would_hit_hwbug; - unsigned long flags; len = skb_headlen(skb); /* No BH disabling for tx_lock here. We are running in BH disabled * context and TX reclaim runs via tp->poll inside of a software - * interrupt. Rejoice! - * - * Actually, things are not so simple. If we are to take a hw - * IRQ here, we can deadlock, consider: - * - * CPU1 CPU2 - * tg3_start_xmit - * take tp->tx_lock - * tg3_timer - * take tp->lock - * tg3_interrupt - * spin on tp->lock - * spin on tp->tx_lock - * - * So we really do need to disable interrupts when taking - * tx_lock here. + * interrupt. Furthermore, IRQ processing runs lockless so we have + * no IRQ context deadlocks to worry about either. Rejoice! */ - local_irq_save(flags); - if (!spin_trylock(&tp->tx_lock)) { - local_irq_restore(flags); + if (!spin_trylock(&tp->tx_lock)) return NETDEV_TX_LOCKED; - } /* This is a hard error, log it. */ if (unlikely(TX_BUFFS_AVAIL(tp) <= (skb_shinfo(skb)->nr_frags + 1))) { netif_stop_queue(dev); - spin_unlock_irqrestore(&tp->tx_lock, flags); + spin_unlock(&tp->tx_lock); printk(KERN_ERR PFX "%s: BUG! Tx Ring full when queue awake!\n", dev->name); return NETDEV_TX_BUSY; @@ -3424,7 +3419,7 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) out_unlock: mmiowb(); - spin_unlock_irqrestore(&tp->tx_lock, flags); + spin_unlock(&tp->tx_lock); dev->trans_start = jiffies; @@ -3458,8 +3453,8 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu) } tg3_netif_stop(tp); - spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); + + tg3_full_lock(tp, 1); tg3_halt(tp, RESET_KIND_SHUTDOWN, 1); @@ -3469,8 +3464,7 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu) tg3_netif_start(tp); - spin_unlock(&tp->tx_lock); - spin_unlock_irq(&tp->lock); + tg3_full_unlock(tp); return 0; } @@ -5091,9 +5085,9 @@ static int tg3_set_mac_addr(struct net_device *dev, void *p) memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); - spin_lock_irq(&tp->lock); + spin_lock_bh(&tp->lock); __tg3_set_mac_addr(tp); - spin_unlock_irq(&tp->lock); + spin_unlock_bh(&tp->lock); return 0; } @@ -5805,10 +5799,8 @@ static void tg3_periodic_fetch_stats(struct tg3 *tp) static void tg3_timer(unsigned long __opaque) { struct tg3 *tp = (struct tg3 *) __opaque; - unsigned long flags; - spin_lock_irqsave(&tp->lock, flags); - spin_lock(&tp->tx_lock); + spin_lock(&tp->lock); if (!(tp->tg3_flags & TG3_FLAG_TAGGED_STATUS)) { /* All of this garbage is because when using non-tagged @@ -5825,8 +5817,7 @@ static void tg3_timer(unsigned long __opaque) if (!(tr32(WDMAC_MODE) & WDMAC_MODE_ENABLE)) { tp->tg3_flags2 |= TG3_FLG2_RESTART_TIMER; - spin_unlock(&tp->tx_lock); - spin_unlock_irqrestore(&tp->lock, flags); + spin_unlock(&tp->lock); schedule_work(&tp->reset_task); return; } @@ -5894,8 +5885,7 @@ static void tg3_timer(unsigned long __opaque) tp->asf_counter = tp->asf_multiplier; } - spin_unlock(&tp->tx_lock); - spin_unlock_irqrestore(&tp->lock, flags); + spin_unlock(&tp->lock); tp->timer.expires = jiffies + tp->timer_offset; add_timer(&tp->timer); @@ -6010,14 +6000,12 @@ static int tg3_test_msi(struct tg3 *tp) /* Need to reset the chip because the MSI cycle may have terminated * with Master Abort. */ - spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); + tg3_full_lock(tp, 1); tg3_halt(tp, RESET_KIND_SHUTDOWN, 1); err = tg3_init_hw(tp); - spin_unlock(&tp->tx_lock); - spin_unlock_irq(&tp->lock); + tg3_full_unlock(tp); if (err) free_irq(tp->pdev->irq, dev); @@ -6030,14 +6018,12 @@ static int tg3_open(struct net_device *dev) struct tg3 *tp = netdev_priv(dev); int err; - spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); + tg3_full_lock(tp, 0); tg3_disable_ints(tp); tp->tg3_flags &= ~TG3_FLAG_INIT_COMPLETE; - spin_unlock(&tp->tx_lock); - spin_unlock_irq(&tp->lock); + tg3_full_unlock(tp); /* The placement of this call is tied * to the setup and use of Host TX descriptors. @@ -6084,8 +6070,7 @@ static int tg3_open(struct net_device *dev) return err; } - spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); + tg3_full_lock(tp, 0); err = tg3_init_hw(tp); if (err) { @@ -6109,8 +6094,7 @@ static int tg3_open(struct net_device *dev) tp->timer.function = tg3_timer; } - spin_unlock(&tp->tx_lock); - spin_unlock_irq(&tp->lock); + tg3_full_unlock(tp); if (err) { free_irq(tp->pdev->irq, dev); @@ -6126,8 +6110,7 @@ static int tg3_open(struct net_device *dev) err = tg3_test_msi(tp); if (err) { - spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); + tg3_full_lock(tp, 0); if (tp->tg3_flags2 & TG3_FLG2_USING_MSI) { pci_disable_msi(tp->pdev); @@ -6137,22 +6120,19 @@ static int tg3_open(struct net_device *dev) tg3_free_rings(tp); tg3_free_consistent(tp); - spin_unlock(&tp->tx_lock); - spin_unlock_irq(&tp->lock); + tg3_full_unlock(tp); return err; } } - spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); + tg3_full_lock(tp, 0); add_timer(&tp->timer); tp->tg3_flags |= TG3_FLAG_INIT_COMPLETE; tg3_enable_ints(tp); - spin_unlock(&tp->tx_lock); - spin_unlock_irq(&tp->lock); + tg3_full_unlock(tp); netif_start_queue(dev); @@ -6398,8 +6378,7 @@ static int tg3_close(struct net_device *dev) del_timer_sync(&tp->timer); - spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); + tg3_full_lock(tp, 1); #if 0 tg3_dump_state(tp); #endif @@ -6413,8 +6392,7 @@ static int tg3_close(struct net_device *dev) TG3_FLAG_GOT_SERDES_FLOWCTL); netif_carrier_off(tp->dev); - spin_unlock(&tp->tx_lock); - spin_unlock_irq(&tp->lock); + tg3_full_unlock(tp); free_irq(tp->pdev->irq, dev); if (tp->tg3_flags2 & TG3_FLG2_USING_MSI) { @@ -6451,16 +6429,15 @@ static unsigned long calc_crc_errors(struct tg3 *tp) if (!(tp->tg3_flags2 & TG3_FLG2_PHY_SERDES) && (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700 || GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5701)) { - unsigned long flags; u32 val; - spin_lock_irqsave(&tp->lock, flags); + spin_lock_bh(&tp->lock); if (!tg3_readphy(tp, 0x1e, &val)) { tg3_writephy(tp, 0x1e, val | 0x8000); tg3_readphy(tp, 0x14, &val); } else val = 0; - spin_unlock_irqrestore(&tp->lock, flags); + spin_unlock_bh(&tp->lock); tp->phy_crc_errors += val; @@ -6722,11 +6699,9 @@ static void tg3_set_rx_mode(struct net_device *dev) { struct tg3 *tp = netdev_priv(dev); - spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); + tg3_full_lock(tp, 0); __tg3_set_rx_mode(dev); - spin_unlock(&tp->tx_lock); - spin_unlock_irq(&tp->lock); + tg3_full_unlock(tp); } #define TG3_REGDUMP_LEN (32 * 1024) @@ -6748,8 +6723,7 @@ static void tg3_get_regs(struct net_device *dev, memset(p, 0, TG3_REGDUMP_LEN); - spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); + tg3_full_lock(tp, 0); #define __GET_REG32(reg) (*(p)++ = tr32(reg)) #define GET_REG32_LOOP(base,len) \ @@ -6799,8 +6773,7 @@ do { p = (u32 *)(orig_p + (reg)); \ #undef GET_REG32_LOOP #undef GET_REG32_1 - spin_unlock(&tp->tx_lock); - spin_unlock_irq(&tp->lock); + tg3_full_unlock(tp); } static int tg3_get_eeprom_len(struct net_device *dev) @@ -6976,8 +6949,7 @@ static int tg3_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) return -EINVAL; } - spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); + tg3_full_lock(tp, 0); tp->link_config.autoneg = cmd->autoneg; if (cmd->autoneg == AUTONEG_ENABLE) { @@ -6993,8 +6965,7 @@ static int tg3_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) if (netif_running(dev)) tg3_setup_phy(tp, 1); - spin_unlock(&tp->tx_lock); - spin_unlock_irq(&tp->lock); + tg3_full_unlock(tp); return 0; } @@ -7030,12 +7001,12 @@ static int tg3_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) !(tp->tg3_flags & TG3_FLAG_SERDES_WOL_CAP)) return -EINVAL; - spin_lock_irq(&tp->lock); + spin_lock_bh(&tp->lock); if (wol->wolopts & WAKE_MAGIC) tp->tg3_flags |= TG3_FLAG_WOL_ENABLE; else tp->tg3_flags &= ~TG3_FLAG_WOL_ENABLE; - spin_unlock_irq(&tp->lock); + spin_unlock_bh(&tp->lock); return 0; } @@ -7075,7 +7046,7 @@ static int tg3_nway_reset(struct net_device *dev) if (!netif_running(dev)) return -EAGAIN; - spin_lock_irq(&tp->lock); + spin_lock_bh(&tp->lock); r = -EINVAL; tg3_readphy(tp, MII_BMCR, &bmcr); if (!tg3_readphy(tp, MII_BMCR, &bmcr) && @@ -7083,7 +7054,7 @@ static int tg3_nway_reset(struct net_device *dev) tg3_writephy(tp, MII_BMCR, bmcr | BMCR_ANRESTART); r = 0; } - spin_unlock_irq(&tp->lock); + spin_unlock_bh(&tp->lock); return r; } @@ -7114,8 +7085,7 @@ static int tg3_set_ringparam(struct net_device *dev, struct ethtool_ringparam *e if (netif_running(dev)) tg3_netif_stop(tp); - spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); + tg3_full_lock(tp, 0); tp->rx_pending = ering->rx_pending; @@ -7131,8 +7101,7 @@ static int tg3_set_ringparam(struct net_device *dev, struct ethtool_ringparam *e tg3_netif_start(tp); } - spin_unlock(&tp->tx_lock); - spin_unlock_irq(&tp->lock); + tg3_full_unlock(tp); return 0; } @@ -7153,8 +7122,8 @@ static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam if (netif_running(dev)) tg3_netif_stop(tp); - spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); + tg3_full_lock(tp, 1); + if (epause->autoneg) tp->tg3_flags |= TG3_FLAG_PAUSE_AUTONEG; else @@ -7173,8 +7142,8 @@ static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam tg3_init_hw(tp); tg3_netif_start(tp); } - spin_unlock(&tp->tx_lock); - spin_unlock_irq(&tp->lock); + + tg3_full_unlock(tp); return 0; } @@ -7195,12 +7164,12 @@ static int tg3_set_rx_csum(struct net_device *dev, u32 data) return 0; } - spin_lock_irq(&tp->lock); + spin_lock_bh(&tp->lock); if (data) tp->tg3_flags |= TG3_FLAG_RX_CHECKSUMS; else tp->tg3_flags &= ~TG3_FLAG_RX_CHECKSUMS; - spin_unlock_irq(&tp->lock); + spin_unlock_bh(&tp->lock); return 0; } @@ -7722,8 +7691,7 @@ static void tg3_self_test(struct net_device *dev, struct ethtool_test *etest, if (netif_running(dev)) tg3_netif_stop(tp); - spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); + tg3_full_lock(tp, 1); tg3_halt(tp, RESET_KIND_SUSPEND, 1); tg3_nvram_lock(tp); @@ -7745,14 +7713,14 @@ static void tg3_self_test(struct net_device *dev, struct ethtool_test *etest, data[4] = 1; } - spin_unlock(&tp->tx_lock); - spin_unlock_irq(&tp->lock); + tg3_full_unlock(tp); + if (tg3_test_interrupt(tp) != 0) { etest->flags |= ETH_TEST_FL_FAILED; data[5] = 1; } - spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); + + tg3_full_lock(tp, 0); tg3_halt(tp, RESET_KIND_SHUTDOWN, 1); if (netif_running(dev)) { @@ -7760,8 +7728,8 @@ static void tg3_self_test(struct net_device *dev, struct ethtool_test *etest, tg3_init_hw(tp); tg3_netif_start(tp); } - spin_unlock(&tp->tx_lock); - spin_unlock_irq(&tp->lock); + + tg3_full_unlock(tp); } } @@ -7782,9 +7750,9 @@ static int tg3_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (tp->tg3_flags2 & TG3_FLG2_PHY_SERDES) break; /* We have no PHY */ - spin_lock_irq(&tp->lock); + spin_lock_bh(&tp->lock); err = tg3_readphy(tp, data->reg_num & 0x1f, &mii_regval); - spin_unlock_irq(&tp->lock); + spin_unlock_bh(&tp->lock); data->val_out = mii_regval; @@ -7798,9 +7766,9 @@ static int tg3_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (!capable(CAP_NET_ADMIN)) return -EPERM; - spin_lock_irq(&tp->lock); + spin_lock_bh(&tp->lock); err = tg3_writephy(tp, data->reg_num & 0x1f, data->val_in); - spin_unlock_irq(&tp->lock); + spin_unlock_bh(&tp->lock); return err; @@ -7816,28 +7784,24 @@ static void tg3_vlan_rx_register(struct net_device *dev, struct vlan_group *grp) { struct tg3 *tp = netdev_priv(dev); - spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); + tg3_full_lock(tp, 0); tp->vlgrp = grp; /* Update RX_MODE_KEEP_VLAN_TAG bit in RX_MODE register. */ __tg3_set_rx_mode(dev); - spin_unlock(&tp->tx_lock); - spin_unlock_irq(&tp->lock); + tg3_full_unlock(tp); } static void tg3_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) { struct tg3 *tp = netdev_priv(dev); - spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); + tg3_full_lock(tp, 0); if (tp->vlgrp) tp->vlgrp->vlan_devices[vid] = NULL; - spin_unlock(&tp->tx_lock); - spin_unlock_irq(&tp->lock); + tg3_full_unlock(tp); } #endif @@ -10168,24 +10132,19 @@ static int tg3_suspend(struct pci_dev *pdev, pm_message_t state) del_timer_sync(&tp->timer); - spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); + tg3_full_lock(tp, 1); tg3_disable_ints(tp); - spin_unlock(&tp->tx_lock); - spin_unlock_irq(&tp->lock); + tg3_full_unlock(tp); netif_device_detach(dev); - spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); + tg3_full_lock(tp, 0); tg3_halt(tp, RESET_KIND_SHUTDOWN, 1); - spin_unlock(&tp->tx_lock); - spin_unlock_irq(&tp->lock); + tg3_full_unlock(tp); err = tg3_set_power_state(tp, pci_choose_state(pdev, state)); if (err) { - spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); + tg3_full_lock(tp, 0); tg3_init_hw(tp); @@ -10195,8 +10154,7 @@ static int tg3_suspend(struct pci_dev *pdev, pm_message_t state) netif_device_attach(dev); tg3_netif_start(tp); - spin_unlock(&tp->tx_lock); - spin_unlock_irq(&tp->lock); + tg3_full_unlock(tp); } return err; @@ -10219,8 +10177,7 @@ static int tg3_resume(struct pci_dev *pdev) netif_device_attach(dev); - spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); + tg3_full_lock(tp, 0); tg3_init_hw(tp); @@ -10231,8 +10188,7 @@ static int tg3_resume(struct pci_dev *pdev) tg3_netif_start(tp); - spin_unlock(&tp->tx_lock); - spin_unlock_irq(&tp->lock); + tg3_full_unlock(tp); return 0; } diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h index 993f84c93dc..99c5f9675a5 100644 --- a/drivers/net/tg3.h +++ b/drivers/net/tg3.h @@ -2006,17 +2006,31 @@ struct tg3_ethtool_stats { struct tg3 { /* begin "general, frequently-used members" cacheline section */ + /* If the IRQ handler (which runs lockless) needs to be + * quiesced, the following bitmask state is used. The + * SYNC flag is set by non-IRQ context code to initiate + * the quiescence. + * + * When the IRQ handler notices that SYNC is set, it + * disables interrupts and returns. + * + * When all outstanding IRQ handlers have returned after + * the SYNC flag has been set, the setter can be assured + * that interrupts will no longer get run. + * + * In this way all SMP driver locks are never acquired + * in hw IRQ context, only sw IRQ context or lower. + */ + unsigned int irq_sync; + /* SMP locking strategy: * * lock: Held during all operations except TX packet * processing. * - * tx_lock: Held during tg3_start_xmit{,_4gbug} and tg3_tx + * tx_lock: Held during tg3_start_xmit and tg3_tx * - * If you want to shut up all asynchronous processing you must - * acquire both locks, 'lock' taken before 'tx_lock'. IRQs must - * be disabled to take 'lock' but only softirq disabling is - * necessary for acquisition of 'tx_lock'. + * Both of these locks are to be held with BH safety. */ spinlock_t lock; spinlock_t indirect_lock; -- cgit v1.2.3 From bbe832c09233738c100145fd535b6b8fc97640f6 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 24 Jun 2005 20:20:04 -0700 Subject: [TG3]: Refinements to new locking strategy. 1. Move tp->irq_sync = 0 to before the interrupt mailbox IO in tg3_enable_ints() so that the interrupt handler will always see irq_sync == 0 when interrupts are enabled. 2. Remove the tg3_enable_ints() call in tg3_reset_hw(). Interrupts are always enabled explicitly or through tg3_netif_start(). This is to prevent interrupts being enabled while poll is disabled. 3. Update trans_start with jiffies in tg3_netif_stop() to prevent false NETDEV WATCHDOG. 4. Pass in the proper irq_sync parameter to tg3_full_lock() depending on netif_running() in some of the ethtool set calls. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/tg3.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 8b8aa2ad578..04e747aa1a8 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -430,12 +430,14 @@ static inline void tg3_cond_int(struct tg3 *tp) static void tg3_enable_ints(struct tg3 *tp) { + tp->irq_sync = 0; + wmb(); + tw32(TG3PCI_MISC_HOST_CTRL, (tp->misc_host_ctrl & ~MISC_HOST_CTRL_MASK_PCI_INT)); tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, (tp->last_tag << 24)); tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW); - tp->irq_sync = 0; tg3_cond_int(tp); } @@ -484,6 +486,7 @@ static void tg3_restart_ints(struct tg3 *tp) static inline void tg3_netif_stop(struct tg3 *tp) { + tp->dev->trans_start = jiffies; /* prevent tx timeout */ netif_poll_disable(tp->dev); netif_tx_disable(tp->dev); } @@ -5724,9 +5727,6 @@ static int tg3_reset_hw(struct tg3 *tp) tg3_write_sig_post_reset(tp, RESET_KIND_INIT); - if (tp->tg3_flags & TG3_FLAG_INIT_COMPLETE) - tg3_enable_ints(tp); - return 0; } @@ -7076,16 +7076,19 @@ static void tg3_get_ringparam(struct net_device *dev, struct ethtool_ringparam * static int tg3_set_ringparam(struct net_device *dev, struct ethtool_ringparam *ering) { struct tg3 *tp = netdev_priv(dev); + int irq_sync = 0; if ((ering->rx_pending > TG3_RX_RING_SIZE - 1) || (ering->rx_jumbo_pending > TG3_RX_JUMBO_RING_SIZE - 1) || (ering->tx_pending > TG3_TX_RING_SIZE - 1)) return -EINVAL; - if (netif_running(dev)) + if (netif_running(dev)) { tg3_netif_stop(tp); + irq_sync = 1; + } - tg3_full_lock(tp, 0); + tg3_full_lock(tp, irq_sync); tp->rx_pending = ering->rx_pending; @@ -7118,11 +7121,14 @@ static void tg3_get_pauseparam(struct net_device *dev, struct ethtool_pauseparam static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam *epause) { struct tg3 *tp = netdev_priv(dev); + int irq_sync = 0; - if (netif_running(dev)) + if (netif_running(dev)) { tg3_netif_stop(tp); + irq_sync = 1; + } - tg3_full_lock(tp, 1); + tg3_full_lock(tp, irq_sync); if (epause->autoneg) tp->tg3_flags |= TG3_FLAG_PAUSE_AUTONEG; @@ -7578,8 +7584,6 @@ static int tg3_test_loopback(struct tg3 *tp) tg3_abort_hw(tp, 1); - /* Clearing this flag to keep interrupts disabled */ - tp->tg3_flags &= ~TG3_FLAG_INIT_COMPLETE; tg3_reset_hw(tp); mac_mode = (tp->mac_mode & ~MAC_MODE_PORT_MODE_MASK) | @@ -7688,10 +7692,14 @@ static void tg3_self_test(struct net_device *dev, struct ethtool_test *etest, data[1] = 1; } if (etest->flags & ETH_TEST_FL_OFFLINE) { - if (netif_running(dev)) + int irq_sync = 0; + + if (netif_running(dev)) { tg3_netif_stop(tp); + irq_sync = 1; + } - tg3_full_lock(tp, 1); + tg3_full_lock(tp, irq_sync); tg3_halt(tp, RESET_KIND_SUSPEND, 1); tg3_nvram_lock(tp); @@ -10184,8 +10192,6 @@ static int tg3_resume(struct pci_dev *pdev) tp->timer.expires = jiffies + tp->timer_offset; add_timer(&tp->timer); - tg3_enable_ints(tp); - tg3_netif_start(tp); tg3_full_unlock(tp); -- cgit v1.2.3 From 5f70eaa0d5768775a7492f3e3841fcca94bb0d13 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 24 Jun 2005 20:21:01 -0700 Subject: [TG3]: Update driver version and reldate. Signed-off-by: David S. Miller --- drivers/net/tg3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 04e747aa1a8..7e371b1209a 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -66,8 +66,8 @@ #define DRV_MODULE_NAME "tg3" #define PFX DRV_MODULE_NAME ": " -#define DRV_MODULE_VERSION "3.31" -#define DRV_MODULE_RELDATE "June 8, 2005" +#define DRV_MODULE_VERSION "3.32" +#define DRV_MODULE_RELDATE "June 24, 2005" #define TG3_DEF_MAC_MODE 0 #define TG3_DEF_RX_MODE 0 -- cgit v1.2.3 From 456b6b863a6a1a80e369e42568817747a427b072 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Sat, 25 Jun 2005 16:58:21 +0100 Subject: [PATCH] ARM: 2749/1: update ixp2000 defconfigs to 2.6.12-git6 Patch from Lennert Buytenhek Update the defconfigs for the ixp2000 platforms to 2.6.12-git6. Signed-off-by: Lennert Buytenhek Signed-off-by: Russell King --- arch/arm/configs/enp2611_defconfig | 58 +++++++++++++++++++++++++++++++------ arch/arm/configs/ixdp2400_defconfig | 58 +++++++++++++++++++++++++++++++------ arch/arm/configs/ixdp2401_defconfig | 58 +++++++++++++++++++++++++++++++------ arch/arm/configs/ixdp2800_defconfig | 58 +++++++++++++++++++++++++++++++------ arch/arm/configs/ixdp2801_defconfig | 58 +++++++++++++++++++++++++++++++------ 5 files changed, 245 insertions(+), 45 deletions(-) diff --git a/arch/arm/configs/enp2611_defconfig b/arch/arm/configs/enp2611_defconfig index 06fae4b6277..af215342456 100644 --- a/arch/arm/configs/enp2611_defconfig +++ b/arch/arm/configs/enp2611_defconfig @@ -1,14 +1,13 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc1-bk2 -# Sun Mar 27 22:08:24 2005 +# Linux kernel version: 2.6.12-git6 +# Sat Jun 25 00:57:29 2005 # CONFIG_ARM=y CONFIG_MMU=y CONFIG_UID16=y CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_GENERIC_IOMAP=y # # Code maturity level options @@ -16,6 +15,7 @@ CONFIG_GENERIC_IOMAP=y CONFIG_EXPERIMENTAL=y CONFIG_CLEAN_COMPILE=y CONFIG_BROKEN_ON_SMP=y +CONFIG_INIT_ENV_ARG_LIMIT=32 # # General setup @@ -35,6 +35,8 @@ CONFIG_EMBEDDED=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y @@ -82,6 +84,7 @@ CONFIG_ARCH_IXP2000=y # CONFIG_ARCH_VERSATILE is not set # CONFIG_ARCH_IMX is not set # CONFIG_ARCH_H720X is not set +# CONFIG_ARCH_AAEC2000 is not set CONFIG_ARCH_SUPPORTS_BIG_ENDIAN=y # @@ -106,7 +109,6 @@ CONFIG_CPU_32v5=y CONFIG_CPU_ABRT_EV5T=y CONFIG_CPU_CACHE_VIVT=y CONFIG_CPU_TLB_V4WBI=y -CONFIG_CPU_MINICACHE=y # # Processor Features @@ -118,9 +120,11 @@ CONFIG_XSCALE_PMU=y # # Bus support # +CONFIG_ISA_DMA_API=y CONFIG_PCI=y CONFIG_PCI_LEGACY_PROC=y CONFIG_PCI_NAMES=y +# CONFIG_PCI_DEBUG is not set # # PCCARD (PCMCIA/CardBus) support @@ -130,7 +134,15 @@ CONFIG_PCI_NAMES=y # # Kernel Features # +# CONFIG_SMP is not set # CONFIG_PREEMPT is not set +# CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y CONFIG_ALIGNMENT_TRAP=y # @@ -269,7 +281,6 @@ CONFIG_MTD_IXP2000=y # # Block devices # -# CONFIG_BLK_DEV_FD is not set # CONFIG_BLK_CPQ_DA is not set # CONFIG_BLK_CPQ_CISS_DA is not set # CONFIG_BLK_DEV_DAC960 is not set @@ -308,6 +319,7 @@ CONFIG_IOSCHED_CFQ=y # # Fusion MPT device support # +# CONFIG_FUSION is not set # # IEEE 1394 (FireWire) support @@ -329,10 +341,11 @@ CONFIG_NET=y # CONFIG_PACKET=y CONFIG_PACKET_MMAP=y -# CONFIG_NETLINK_DEV is not set CONFIG_UNIX=y # CONFIG_NET_KEY is not set CONFIG_INET=y +CONFIG_IP_FIB_HASH=y +# CONFIG_IP_FIB_TRIE is not set # CONFIG_IP_MULTICAST is not set # CONFIG_IP_ADVANCED_ROUTER is not set CONFIG_IP_PNP=y @@ -349,6 +362,17 @@ CONFIG_SYN_COOKIES=y # CONFIG_INET_TUNNEL is not set # CONFIG_IP_TCPDIAG is not set # CONFIG_IP_TCPDIAG_IPV6 is not set + +# +# TCP congestion control +# +CONFIG_TCP_CONG_BIC=y +CONFIG_TCP_CONG_WESTWOOD=m +CONFIG_TCP_CONG_HTCP=m +# CONFIG_TCP_CONG_HSTCP is not set +# CONFIG_TCP_CONG_HYBLA is not set +# CONFIG_TCP_CONG_VEGAS is not set +# CONFIG_TCP_CONG_SCALABLE is not set # CONFIG_IPV6 is not set # CONFIG_NETFILTER is not set @@ -404,6 +428,7 @@ CONFIG_MII=y # CONFIG_SUNGEM is not set # CONFIG_NET_VENDOR_3COM is not set # CONFIG_SMC91X is not set +# CONFIG_DM9000 is not set # # Tulip family network device support @@ -440,9 +465,11 @@ CONFIG_EEPRO100=y # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set # CONFIG_R8169 is not set +# CONFIG_SKGE is not set # CONFIG_SK98LIN is not set # CONFIG_VIA_VELOCITY is not set # CONFIG_TIGON3 is not set +# CONFIG_BNX2 is not set # # Ethernet (10000 Mbit) @@ -464,6 +491,7 @@ CONFIG_EEPRO100=y # Wan interfaces # CONFIG_WAN=y +# CONFIG_DSCC4 is not set # CONFIG_LANMEDIA is not set # CONFIG_SYNCLINK_SYNCPPP is not set CONFIG_HDLC=y @@ -526,7 +554,6 @@ CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 # # CONFIG_SERIO is not set # CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y # # Character devices @@ -547,6 +574,7 @@ CONFIG_SERIAL_8250_NR_UARTS=2 # CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_JSM is not set CONFIG_UNIX98_PTYS=y CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 @@ -613,17 +641,18 @@ CONFIG_I2C_ALGOBIT=y # CONFIG_I2C_AMD8111 is not set # CONFIG_I2C_I801 is not set # CONFIG_I2C_I810 is not set +# CONFIG_I2C_PIIX4 is not set # CONFIG_I2C_ISA is not set # CONFIG_I2C_IXP2000 is not set # CONFIG_I2C_NFORCE2 is not set # CONFIG_I2C_PARPORT_LIGHT is not set -# CONFIG_I2C_PIIX4 is not set # CONFIG_I2C_PROSAVAGE is not set # CONFIG_I2C_SAVAGE4 is not set # CONFIG_SCx200_ACB is not set # CONFIG_I2C_SIS5595 is not set # CONFIG_I2C_SIS630 is not set # CONFIG_I2C_SIS96X is not set +# CONFIG_I2C_STUB is not set # CONFIG_I2C_VIA is not set # CONFIG_I2C_VIAPRO is not set # CONFIG_I2C_VOODOO3 is not set @@ -637,7 +666,9 @@ CONFIG_I2C_SENSOR=y # CONFIG_SENSORS_ADM1025 is not set # CONFIG_SENSORS_ADM1026 is not set # CONFIG_SENSORS_ADM1031 is not set +# CONFIG_SENSORS_ADM9240 is not set # CONFIG_SENSORS_ASB100 is not set +# CONFIG_SENSORS_ATXP1 is not set # CONFIG_SENSORS_DS1621 is not set # CONFIG_SENSORS_FSCHER is not set # CONFIG_SENSORS_FSCPOS is not set @@ -653,6 +684,7 @@ CONFIG_I2C_SENSOR=y # CONFIG_SENSORS_LM85 is not set # CONFIG_SENSORS_LM87 is not set # CONFIG_SENSORS_LM90 is not set +# CONFIG_SENSORS_LM92 is not set # CONFIG_SENSORS_MAX1619 is not set # CONFIG_SENSORS_PC87360 is not set # CONFIG_SENSORS_SMSC47B397 is not set @@ -662,14 +694,19 @@ CONFIG_I2C_SENSOR=y # CONFIG_SENSORS_W83781D is not set # CONFIG_SENSORS_W83L785TS is not set # CONFIG_SENSORS_W83627HF is not set +# CONFIG_SENSORS_W83627EHF is not set # # Other I2C Chip support # +# CONFIG_SENSORS_DS1337 is not set +# CONFIG_SENSORS_DS1374 is not set CONFIG_SENSORS_EEPROM=y # CONFIG_SENSORS_PCF8574 is not set +# CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set # CONFIG_SENSORS_RTC8564 is not set +# CONFIG_SENSORS_MAX6875 is not set # CONFIG_I2C_DEBUG_CORE is not set # CONFIG_I2C_DEBUG_ALGO is not set # CONFIG_I2C_DEBUG_BUS is not set @@ -723,6 +760,7 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y # CONFIG_EXT2_FS_SECURITY is not set +# CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y @@ -763,7 +801,6 @@ CONFIG_DNOTIFY=y # CONFIG_PROC_FS=y CONFIG_SYSFS=y -# CONFIG_DEVFS_FS is not set # CONFIG_DEVPTS_FS_XATTR is not set CONFIG_TMPFS=y # CONFIG_TMPFS_XATTR is not set @@ -801,12 +838,14 @@ CONFIG_JFFS2_RTIME=y # CONFIG_NFS_FS=y CONFIG_NFS_V3=y +# CONFIG_NFS_V3_ACL is not set # CONFIG_NFS_V4 is not set # CONFIG_NFS_DIRECTIO is not set # CONFIG_NFSD is not set CONFIG_ROOT_NFS=y CONFIG_LOCKD=y CONFIG_LOCKD_V4=y +CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y # CONFIG_RPCSEC_GSS_KRB5 is not set # CONFIG_RPCSEC_GSS_SPKM3 is not set @@ -891,3 +930,4 @@ CONFIG_CRC32=y # CONFIG_LIBCRC32C is not set CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=y +# CONFIG_TEXTSEARCH is not set diff --git a/arch/arm/configs/ixdp2400_defconfig b/arch/arm/configs/ixdp2400_defconfig index 810a450a55d..a7ee1a442f3 100644 --- a/arch/arm/configs/ixdp2400_defconfig +++ b/arch/arm/configs/ixdp2400_defconfig @@ -1,14 +1,13 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc1-bk2 -# Sun Mar 27 21:13:38 2005 +# Linux kernel version: 2.6.12-git6 +# Sat Jun 25 00:58:38 2005 # CONFIG_ARM=y CONFIG_MMU=y CONFIG_UID16=y CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_GENERIC_IOMAP=y # # Code maturity level options @@ -16,6 +15,7 @@ CONFIG_GENERIC_IOMAP=y CONFIG_EXPERIMENTAL=y CONFIG_CLEAN_COMPILE=y CONFIG_BROKEN_ON_SMP=y +CONFIG_INIT_ENV_ARG_LIMIT=32 # # General setup @@ -35,6 +35,8 @@ CONFIG_EMBEDDED=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y @@ -82,6 +84,7 @@ CONFIG_ARCH_IXP2000=y # CONFIG_ARCH_VERSATILE is not set # CONFIG_ARCH_IMX is not set # CONFIG_ARCH_H720X is not set +# CONFIG_ARCH_AAEC2000 is not set CONFIG_ARCH_SUPPORTS_BIG_ENDIAN=y # @@ -107,7 +110,6 @@ CONFIG_CPU_32v5=y CONFIG_CPU_ABRT_EV5T=y CONFIG_CPU_CACHE_VIVT=y CONFIG_CPU_TLB_V4WBI=y -CONFIG_CPU_MINICACHE=y # # Processor Features @@ -119,9 +121,11 @@ CONFIG_XSCALE_PMU=y # # Bus support # +CONFIG_ISA_DMA_API=y CONFIG_PCI=y CONFIG_PCI_LEGACY_PROC=y CONFIG_PCI_NAMES=y +# CONFIG_PCI_DEBUG is not set # # PCCARD (PCMCIA/CardBus) support @@ -131,7 +135,15 @@ CONFIG_PCI_NAMES=y # # Kernel Features # +# CONFIG_SMP is not set # CONFIG_PREEMPT is not set +# CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y CONFIG_ALIGNMENT_TRAP=y # @@ -270,7 +282,6 @@ CONFIG_MTD_IXP2000=y # # Block devices # -# CONFIG_BLK_DEV_FD is not set # CONFIG_BLK_CPQ_DA is not set # CONFIG_BLK_CPQ_CISS_DA is not set # CONFIG_BLK_DEV_DAC960 is not set @@ -309,6 +320,7 @@ CONFIG_IOSCHED_CFQ=y # # Fusion MPT device support # +# CONFIG_FUSION is not set # # IEEE 1394 (FireWire) support @@ -330,10 +342,11 @@ CONFIG_NET=y # CONFIG_PACKET=y CONFIG_PACKET_MMAP=y -# CONFIG_NETLINK_DEV is not set CONFIG_UNIX=y # CONFIG_NET_KEY is not set CONFIG_INET=y +CONFIG_IP_FIB_HASH=y +# CONFIG_IP_FIB_TRIE is not set # CONFIG_IP_MULTICAST is not set # CONFIG_IP_ADVANCED_ROUTER is not set CONFIG_IP_PNP=y @@ -350,6 +363,17 @@ CONFIG_SYN_COOKIES=y # CONFIG_INET_TUNNEL is not set # CONFIG_IP_TCPDIAG is not set # CONFIG_IP_TCPDIAG_IPV6 is not set + +# +# TCP congestion control +# +CONFIG_TCP_CONG_BIC=y +CONFIG_TCP_CONG_WESTWOOD=m +CONFIG_TCP_CONG_HTCP=m +# CONFIG_TCP_CONG_HSTCP is not set +# CONFIG_TCP_CONG_HYBLA is not set +# CONFIG_TCP_CONG_VEGAS is not set +# CONFIG_TCP_CONG_SCALABLE is not set # CONFIG_IPV6 is not set # CONFIG_NETFILTER is not set @@ -405,6 +429,7 @@ CONFIG_MII=y # CONFIG_SUNGEM is not set # CONFIG_NET_VENDOR_3COM is not set # CONFIG_SMC91X is not set +# CONFIG_DM9000 is not set # # Tulip family network device support @@ -441,9 +466,11 @@ CONFIG_EEPRO100=y # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set # CONFIG_R8169 is not set +# CONFIG_SKGE is not set # CONFIG_SK98LIN is not set # CONFIG_VIA_VELOCITY is not set # CONFIG_TIGON3 is not set +# CONFIG_BNX2 is not set # # Ethernet (10000 Mbit) @@ -465,6 +492,7 @@ CONFIG_EEPRO100=y # Wan interfaces # CONFIG_WAN=y +# CONFIG_DSCC4 is not set # CONFIG_LANMEDIA is not set # CONFIG_SYNCLINK_SYNCPPP is not set CONFIG_HDLC=y @@ -527,7 +555,6 @@ CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 # # CONFIG_SERIO is not set # CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y # # Character devices @@ -548,6 +575,7 @@ CONFIG_SERIAL_8250_NR_UARTS=2 # CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_JSM is not set CONFIG_UNIX98_PTYS=y CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 @@ -614,17 +642,18 @@ CONFIG_I2C_ALGOBIT=y # CONFIG_I2C_AMD8111 is not set # CONFIG_I2C_I801 is not set # CONFIG_I2C_I810 is not set +# CONFIG_I2C_PIIX4 is not set # CONFIG_I2C_ISA is not set # CONFIG_I2C_IXP2000 is not set # CONFIG_I2C_NFORCE2 is not set # CONFIG_I2C_PARPORT_LIGHT is not set -# CONFIG_I2C_PIIX4 is not set # CONFIG_I2C_PROSAVAGE is not set # CONFIG_I2C_SAVAGE4 is not set # CONFIG_SCx200_ACB is not set # CONFIG_I2C_SIS5595 is not set # CONFIG_I2C_SIS630 is not set # CONFIG_I2C_SIS96X is not set +# CONFIG_I2C_STUB is not set # CONFIG_I2C_VIA is not set # CONFIG_I2C_VIAPRO is not set # CONFIG_I2C_VOODOO3 is not set @@ -638,7 +667,9 @@ CONFIG_I2C_SENSOR=y # CONFIG_SENSORS_ADM1025 is not set # CONFIG_SENSORS_ADM1026 is not set # CONFIG_SENSORS_ADM1031 is not set +# CONFIG_SENSORS_ADM9240 is not set # CONFIG_SENSORS_ASB100 is not set +# CONFIG_SENSORS_ATXP1 is not set # CONFIG_SENSORS_DS1621 is not set # CONFIG_SENSORS_FSCHER is not set # CONFIG_SENSORS_FSCPOS is not set @@ -654,6 +685,7 @@ CONFIG_I2C_SENSOR=y # CONFIG_SENSORS_LM85 is not set # CONFIG_SENSORS_LM87 is not set # CONFIG_SENSORS_LM90 is not set +# CONFIG_SENSORS_LM92 is not set # CONFIG_SENSORS_MAX1619 is not set # CONFIG_SENSORS_PC87360 is not set # CONFIG_SENSORS_SMSC47B397 is not set @@ -663,14 +695,19 @@ CONFIG_I2C_SENSOR=y # CONFIG_SENSORS_W83781D is not set # CONFIG_SENSORS_W83L785TS is not set # CONFIG_SENSORS_W83627HF is not set +# CONFIG_SENSORS_W83627EHF is not set # # Other I2C Chip support # +# CONFIG_SENSORS_DS1337 is not set +# CONFIG_SENSORS_DS1374 is not set CONFIG_SENSORS_EEPROM=y # CONFIG_SENSORS_PCF8574 is not set +# CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set # CONFIG_SENSORS_RTC8564 is not set +# CONFIG_SENSORS_MAX6875 is not set # CONFIG_I2C_DEBUG_CORE is not set # CONFIG_I2C_DEBUG_ALGO is not set # CONFIG_I2C_DEBUG_BUS is not set @@ -724,6 +761,7 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y # CONFIG_EXT2_FS_SECURITY is not set +# CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y @@ -764,7 +802,6 @@ CONFIG_DNOTIFY=y # CONFIG_PROC_FS=y CONFIG_SYSFS=y -# CONFIG_DEVFS_FS is not set # CONFIG_DEVPTS_FS_XATTR is not set CONFIG_TMPFS=y # CONFIG_TMPFS_XATTR is not set @@ -802,12 +839,14 @@ CONFIG_JFFS2_RTIME=y # CONFIG_NFS_FS=y CONFIG_NFS_V3=y +# CONFIG_NFS_V3_ACL is not set # CONFIG_NFS_V4 is not set # CONFIG_NFS_DIRECTIO is not set # CONFIG_NFSD is not set CONFIG_ROOT_NFS=y CONFIG_LOCKD=y CONFIG_LOCKD_V4=y +CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y # CONFIG_RPCSEC_GSS_KRB5 is not set # CONFIG_RPCSEC_GSS_SPKM3 is not set @@ -892,3 +931,4 @@ CONFIG_CRC32=y # CONFIG_LIBCRC32C is not set CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=y +# CONFIG_TEXTSEARCH is not set diff --git a/arch/arm/configs/ixdp2401_defconfig b/arch/arm/configs/ixdp2401_defconfig index 72e1b940e97..2da48fca5c1 100644 --- a/arch/arm/configs/ixdp2401_defconfig +++ b/arch/arm/configs/ixdp2401_defconfig @@ -1,14 +1,13 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc1-bk2 -# Sun Mar 27 21:53:55 2005 +# Linux kernel version: 2.6.12-git6 +# Sat Jun 25 00:59:35 2005 # CONFIG_ARM=y CONFIG_MMU=y CONFIG_UID16=y CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_GENERIC_IOMAP=y # # Code maturity level options @@ -16,6 +15,7 @@ CONFIG_GENERIC_IOMAP=y CONFIG_EXPERIMENTAL=y CONFIG_CLEAN_COMPILE=y CONFIG_BROKEN_ON_SMP=y +CONFIG_INIT_ENV_ARG_LIMIT=32 # # General setup @@ -35,6 +35,8 @@ CONFIG_EMBEDDED=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y @@ -82,6 +84,7 @@ CONFIG_ARCH_IXP2000=y # CONFIG_ARCH_VERSATILE is not set # CONFIG_ARCH_IMX is not set # CONFIG_ARCH_H720X is not set +# CONFIG_ARCH_AAEC2000 is not set CONFIG_ARCH_SUPPORTS_BIG_ENDIAN=y # @@ -107,7 +110,6 @@ CONFIG_CPU_32v5=y CONFIG_CPU_ABRT_EV5T=y CONFIG_CPU_CACHE_VIVT=y CONFIG_CPU_TLB_V4WBI=y -CONFIG_CPU_MINICACHE=y # # Processor Features @@ -119,9 +121,11 @@ CONFIG_XSCALE_PMU=y # # Bus support # +CONFIG_ISA_DMA_API=y CONFIG_PCI=y CONFIG_PCI_LEGACY_PROC=y CONFIG_PCI_NAMES=y +# CONFIG_PCI_DEBUG is not set # # PCCARD (PCMCIA/CardBus) support @@ -131,7 +135,15 @@ CONFIG_PCI_NAMES=y # # Kernel Features # +# CONFIG_SMP is not set # CONFIG_PREEMPT is not set +# CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y CONFIG_ALIGNMENT_TRAP=y # @@ -270,7 +282,6 @@ CONFIG_MTD_IXP2000=y # # Block devices # -# CONFIG_BLK_DEV_FD is not set # CONFIG_BLK_CPQ_DA is not set # CONFIG_BLK_CPQ_CISS_DA is not set # CONFIG_BLK_DEV_DAC960 is not set @@ -309,6 +320,7 @@ CONFIG_IOSCHED_CFQ=y # # Fusion MPT device support # +# CONFIG_FUSION is not set # # IEEE 1394 (FireWire) support @@ -330,10 +342,11 @@ CONFIG_NET=y # CONFIG_PACKET=y CONFIG_PACKET_MMAP=y -# CONFIG_NETLINK_DEV is not set CONFIG_UNIX=y # CONFIG_NET_KEY is not set CONFIG_INET=y +CONFIG_IP_FIB_HASH=y +# CONFIG_IP_FIB_TRIE is not set # CONFIG_IP_MULTICAST is not set # CONFIG_IP_ADVANCED_ROUTER is not set CONFIG_IP_PNP=y @@ -350,6 +363,17 @@ CONFIG_SYN_COOKIES=y # CONFIG_INET_TUNNEL is not set CONFIG_IP_TCPDIAG=y # CONFIG_IP_TCPDIAG_IPV6 is not set + +# +# TCP congestion control +# +CONFIG_TCP_CONG_BIC=y +CONFIG_TCP_CONG_WESTWOOD=m +CONFIG_TCP_CONG_HTCP=m +# CONFIG_TCP_CONG_HSTCP is not set +# CONFIG_TCP_CONG_HYBLA is not set +# CONFIG_TCP_CONG_VEGAS is not set +# CONFIG_TCP_CONG_SCALABLE is not set # CONFIG_IPV6 is not set # CONFIG_NETFILTER is not set @@ -405,6 +429,7 @@ CONFIG_MII=y # CONFIG_SUNGEM is not set # CONFIG_NET_VENDOR_3COM is not set # CONFIG_SMC91X is not set +# CONFIG_DM9000 is not set # # Tulip family network device support @@ -442,9 +467,11 @@ CONFIG_EEPRO100=y # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set # CONFIG_R8169 is not set +# CONFIG_SKGE is not set # CONFIG_SK98LIN is not set # CONFIG_VIA_VELOCITY is not set # CONFIG_TIGON3 is not set +# CONFIG_BNX2 is not set # # Ethernet (10000 Mbit) @@ -466,6 +493,7 @@ CONFIG_EEPRO100=y # Wan interfaces # CONFIG_WAN=y +# CONFIG_DSCC4 is not set # CONFIG_LANMEDIA is not set # CONFIG_SYNCLINK_SYNCPPP is not set CONFIG_HDLC=y @@ -528,7 +556,6 @@ CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 # # CONFIG_SERIO is not set # CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y # # Character devices @@ -549,6 +576,7 @@ CONFIG_SERIAL_8250_NR_UARTS=2 # CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_JSM is not set CONFIG_UNIX98_PTYS=y CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 @@ -615,17 +643,18 @@ CONFIG_I2C_ALGOBIT=y # CONFIG_I2C_AMD8111 is not set # CONFIG_I2C_I801 is not set # CONFIG_I2C_I810 is not set +# CONFIG_I2C_PIIX4 is not set # CONFIG_I2C_ISA is not set # CONFIG_I2C_IXP2000 is not set # CONFIG_I2C_NFORCE2 is not set # CONFIG_I2C_PARPORT_LIGHT is not set -# CONFIG_I2C_PIIX4 is not set # CONFIG_I2C_PROSAVAGE is not set # CONFIG_I2C_SAVAGE4 is not set # CONFIG_SCx200_ACB is not set # CONFIG_I2C_SIS5595 is not set # CONFIG_I2C_SIS630 is not set # CONFIG_I2C_SIS96X is not set +# CONFIG_I2C_STUB is not set # CONFIG_I2C_VIA is not set # CONFIG_I2C_VIAPRO is not set # CONFIG_I2C_VOODOO3 is not set @@ -639,7 +668,9 @@ CONFIG_I2C_SENSOR=y # CONFIG_SENSORS_ADM1025 is not set # CONFIG_SENSORS_ADM1026 is not set # CONFIG_SENSORS_ADM1031 is not set +# CONFIG_SENSORS_ADM9240 is not set # CONFIG_SENSORS_ASB100 is not set +# CONFIG_SENSORS_ATXP1 is not set # CONFIG_SENSORS_DS1621 is not set # CONFIG_SENSORS_FSCHER is not set # CONFIG_SENSORS_FSCPOS is not set @@ -655,6 +686,7 @@ CONFIG_I2C_SENSOR=y # CONFIG_SENSORS_LM85 is not set # CONFIG_SENSORS_LM87 is not set # CONFIG_SENSORS_LM90 is not set +# CONFIG_SENSORS_LM92 is not set # CONFIG_SENSORS_MAX1619 is not set # CONFIG_SENSORS_PC87360 is not set # CONFIG_SENSORS_SMSC47B397 is not set @@ -664,14 +696,19 @@ CONFIG_I2C_SENSOR=y # CONFIG_SENSORS_W83781D is not set # CONFIG_SENSORS_W83L785TS is not set # CONFIG_SENSORS_W83627HF is not set +# CONFIG_SENSORS_W83627EHF is not set # # Other I2C Chip support # +# CONFIG_SENSORS_DS1337 is not set +# CONFIG_SENSORS_DS1374 is not set CONFIG_SENSORS_EEPROM=y # CONFIG_SENSORS_PCF8574 is not set +# CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set # CONFIG_SENSORS_RTC8564 is not set +# CONFIG_SENSORS_MAX6875 is not set # CONFIG_I2C_DEBUG_CORE is not set # CONFIG_I2C_DEBUG_ALGO is not set # CONFIG_I2C_DEBUG_BUS is not set @@ -725,6 +762,7 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y # CONFIG_EXT2_FS_SECURITY is not set +# CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y @@ -765,7 +803,6 @@ CONFIG_DNOTIFY=y # CONFIG_PROC_FS=y CONFIG_SYSFS=y -# CONFIG_DEVFS_FS is not set # CONFIG_DEVPTS_FS_XATTR is not set CONFIG_TMPFS=y # CONFIG_TMPFS_XATTR is not set @@ -803,12 +840,14 @@ CONFIG_JFFS2_RTIME=y # CONFIG_NFS_FS=y CONFIG_NFS_V3=y +# CONFIG_NFS_V3_ACL is not set # CONFIG_NFS_V4 is not set # CONFIG_NFS_DIRECTIO is not set # CONFIG_NFSD is not set CONFIG_ROOT_NFS=y CONFIG_LOCKD=y CONFIG_LOCKD_V4=y +CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y # CONFIG_RPCSEC_GSS_KRB5 is not set # CONFIG_RPCSEC_GSS_SPKM3 is not set @@ -893,3 +932,4 @@ CONFIG_CRC32=y # CONFIG_LIBCRC32C is not set CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=y +# CONFIG_TEXTSEARCH is not set diff --git a/arch/arm/configs/ixdp2800_defconfig b/arch/arm/configs/ixdp2800_defconfig index 1592e45f027..7ba867f751f 100644 --- a/arch/arm/configs/ixdp2800_defconfig +++ b/arch/arm/configs/ixdp2800_defconfig @@ -1,14 +1,13 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc1-bk2 -# Sun Mar 27 22:15:23 2005 +# Linux kernel version: 2.6.12-git6 +# Sat Jun 25 01:00:27 2005 # CONFIG_ARM=y CONFIG_MMU=y CONFIG_UID16=y CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_GENERIC_IOMAP=y # # Code maturity level options @@ -16,6 +15,7 @@ CONFIG_GENERIC_IOMAP=y CONFIG_EXPERIMENTAL=y CONFIG_CLEAN_COMPILE=y CONFIG_BROKEN_ON_SMP=y +CONFIG_INIT_ENV_ARG_LIMIT=32 # # General setup @@ -35,6 +35,8 @@ CONFIG_EMBEDDED=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y @@ -82,6 +84,7 @@ CONFIG_ARCH_IXP2000=y # CONFIG_ARCH_VERSATILE is not set # CONFIG_ARCH_IMX is not set # CONFIG_ARCH_H720X is not set +# CONFIG_ARCH_AAEC2000 is not set CONFIG_ARCH_SUPPORTS_BIG_ENDIAN=y # @@ -107,7 +110,6 @@ CONFIG_CPU_32v5=y CONFIG_CPU_ABRT_EV5T=y CONFIG_CPU_CACHE_VIVT=y CONFIG_CPU_TLB_V4WBI=y -CONFIG_CPU_MINICACHE=y # # Processor Features @@ -119,9 +121,11 @@ CONFIG_XSCALE_PMU=y # # Bus support # +CONFIG_ISA_DMA_API=y CONFIG_PCI=y CONFIG_PCI_LEGACY_PROC=y CONFIG_PCI_NAMES=y +# CONFIG_PCI_DEBUG is not set # # PCCARD (PCMCIA/CardBus) support @@ -131,7 +135,15 @@ CONFIG_PCI_NAMES=y # # Kernel Features # +# CONFIG_SMP is not set # CONFIG_PREEMPT is not set +# CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y CONFIG_ALIGNMENT_TRAP=y # @@ -270,7 +282,6 @@ CONFIG_MTD_IXP2000=y # # Block devices # -# CONFIG_BLK_DEV_FD is not set # CONFIG_BLK_CPQ_DA is not set # CONFIG_BLK_CPQ_CISS_DA is not set # CONFIG_BLK_DEV_DAC960 is not set @@ -309,6 +320,7 @@ CONFIG_IOSCHED_CFQ=y # # Fusion MPT device support # +# CONFIG_FUSION is not set # # IEEE 1394 (FireWire) support @@ -330,10 +342,11 @@ CONFIG_NET=y # CONFIG_PACKET=y CONFIG_PACKET_MMAP=y -# CONFIG_NETLINK_DEV is not set CONFIG_UNIX=y # CONFIG_NET_KEY is not set CONFIG_INET=y +CONFIG_IP_FIB_HASH=y +# CONFIG_IP_FIB_TRIE is not set # CONFIG_IP_MULTICAST is not set # CONFIG_IP_ADVANCED_ROUTER is not set CONFIG_IP_PNP=y @@ -350,6 +363,17 @@ CONFIG_SYN_COOKIES=y # CONFIG_INET_TUNNEL is not set # CONFIG_IP_TCPDIAG is not set # CONFIG_IP_TCPDIAG_IPV6 is not set + +# +# TCP congestion control +# +CONFIG_TCP_CONG_BIC=y +CONFIG_TCP_CONG_WESTWOOD=m +CONFIG_TCP_CONG_HTCP=m +# CONFIG_TCP_CONG_HSTCP is not set +# CONFIG_TCP_CONG_HYBLA is not set +# CONFIG_TCP_CONG_VEGAS is not set +# CONFIG_TCP_CONG_SCALABLE is not set # CONFIG_IPV6 is not set # CONFIG_NETFILTER is not set @@ -405,6 +429,7 @@ CONFIG_MII=y # CONFIG_SUNGEM is not set # CONFIG_NET_VENDOR_3COM is not set # CONFIG_SMC91X is not set +# CONFIG_DM9000 is not set # # Tulip family network device support @@ -441,9 +466,11 @@ CONFIG_EEPRO100=y # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set # CONFIG_R8169 is not set +# CONFIG_SKGE is not set # CONFIG_SK98LIN is not set # CONFIG_VIA_VELOCITY is not set # CONFIG_TIGON3 is not set +# CONFIG_BNX2 is not set # # Ethernet (10000 Mbit) @@ -465,6 +492,7 @@ CONFIG_EEPRO100=y # Wan interfaces # CONFIG_WAN=y +# CONFIG_DSCC4 is not set # CONFIG_LANMEDIA is not set # CONFIG_SYNCLINK_SYNCPPP is not set CONFIG_HDLC=y @@ -527,7 +555,6 @@ CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 # # CONFIG_SERIO is not set # CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y # # Character devices @@ -548,6 +575,7 @@ CONFIG_SERIAL_8250_NR_UARTS=2 # CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_JSM is not set CONFIG_UNIX98_PTYS=y CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 @@ -614,17 +642,18 @@ CONFIG_I2C_ALGOBIT=y # CONFIG_I2C_AMD8111 is not set # CONFIG_I2C_I801 is not set # CONFIG_I2C_I810 is not set +# CONFIG_I2C_PIIX4 is not set # CONFIG_I2C_ISA is not set # CONFIG_I2C_IXP2000 is not set # CONFIG_I2C_NFORCE2 is not set # CONFIG_I2C_PARPORT_LIGHT is not set -# CONFIG_I2C_PIIX4 is not set # CONFIG_I2C_PROSAVAGE is not set # CONFIG_I2C_SAVAGE4 is not set # CONFIG_SCx200_ACB is not set # CONFIG_I2C_SIS5595 is not set # CONFIG_I2C_SIS630 is not set # CONFIG_I2C_SIS96X is not set +# CONFIG_I2C_STUB is not set # CONFIG_I2C_VIA is not set # CONFIG_I2C_VIAPRO is not set # CONFIG_I2C_VOODOO3 is not set @@ -638,7 +667,9 @@ CONFIG_I2C_SENSOR=y # CONFIG_SENSORS_ADM1025 is not set # CONFIG_SENSORS_ADM1026 is not set # CONFIG_SENSORS_ADM1031 is not set +# CONFIG_SENSORS_ADM9240 is not set # CONFIG_SENSORS_ASB100 is not set +# CONFIG_SENSORS_ATXP1 is not set # CONFIG_SENSORS_DS1621 is not set # CONFIG_SENSORS_FSCHER is not set # CONFIG_SENSORS_FSCPOS is not set @@ -654,6 +685,7 @@ CONFIG_I2C_SENSOR=y # CONFIG_SENSORS_LM85 is not set # CONFIG_SENSORS_LM87 is not set # CONFIG_SENSORS_LM90 is not set +# CONFIG_SENSORS_LM92 is not set # CONFIG_SENSORS_MAX1619 is not set # CONFIG_SENSORS_PC87360 is not set # CONFIG_SENSORS_SMSC47B397 is not set @@ -663,14 +695,19 @@ CONFIG_I2C_SENSOR=y # CONFIG_SENSORS_W83781D is not set # CONFIG_SENSORS_W83L785TS is not set # CONFIG_SENSORS_W83627HF is not set +# CONFIG_SENSORS_W83627EHF is not set # # Other I2C Chip support # +# CONFIG_SENSORS_DS1337 is not set +# CONFIG_SENSORS_DS1374 is not set CONFIG_SENSORS_EEPROM=y # CONFIG_SENSORS_PCF8574 is not set +# CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set # CONFIG_SENSORS_RTC8564 is not set +# CONFIG_SENSORS_MAX6875 is not set # CONFIG_I2C_DEBUG_CORE is not set # CONFIG_I2C_DEBUG_ALGO is not set # CONFIG_I2C_DEBUG_BUS is not set @@ -724,6 +761,7 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y # CONFIG_EXT2_FS_SECURITY is not set +# CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y @@ -764,7 +802,6 @@ CONFIG_DNOTIFY=y # CONFIG_PROC_FS=y CONFIG_SYSFS=y -# CONFIG_DEVFS_FS is not set # CONFIG_DEVPTS_FS_XATTR is not set CONFIG_TMPFS=y # CONFIG_TMPFS_XATTR is not set @@ -802,12 +839,14 @@ CONFIG_JFFS2_RTIME=y # CONFIG_NFS_FS=y CONFIG_NFS_V3=y +# CONFIG_NFS_V3_ACL is not set # CONFIG_NFS_V4 is not set # CONFIG_NFS_DIRECTIO is not set # CONFIG_NFSD is not set CONFIG_ROOT_NFS=y CONFIG_LOCKD=y CONFIG_LOCKD_V4=y +CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y # CONFIG_RPCSEC_GSS_KRB5 is not set # CONFIG_RPCSEC_GSS_SPKM3 is not set @@ -892,3 +931,4 @@ CONFIG_CRC32=y # CONFIG_LIBCRC32C is not set CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=y +# CONFIG_TEXTSEARCH is not set diff --git a/arch/arm/configs/ixdp2801_defconfig b/arch/arm/configs/ixdp2801_defconfig index f1afe3d09ec..c4df0ec34b0 100644 --- a/arch/arm/configs/ixdp2801_defconfig +++ b/arch/arm/configs/ixdp2801_defconfig @@ -1,14 +1,13 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc1-bk2 -# Sun Mar 27 22:39:19 2005 +# Linux kernel version: 2.6.12-git6 +# Sat Jun 25 01:01:18 2005 # CONFIG_ARM=y CONFIG_MMU=y CONFIG_UID16=y CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_GENERIC_IOMAP=y # # Code maturity level options @@ -16,6 +15,7 @@ CONFIG_GENERIC_IOMAP=y CONFIG_EXPERIMENTAL=y CONFIG_CLEAN_COMPILE=y CONFIG_BROKEN_ON_SMP=y +CONFIG_INIT_ENV_ARG_LIMIT=32 # # General setup @@ -35,6 +35,8 @@ CONFIG_EMBEDDED=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y @@ -82,6 +84,7 @@ CONFIG_ARCH_IXP2000=y # CONFIG_ARCH_VERSATILE is not set # CONFIG_ARCH_IMX is not set # CONFIG_ARCH_H720X is not set +# CONFIG_ARCH_AAEC2000 is not set CONFIG_ARCH_SUPPORTS_BIG_ENDIAN=y # @@ -107,7 +110,6 @@ CONFIG_CPU_32v5=y CONFIG_CPU_ABRT_EV5T=y CONFIG_CPU_CACHE_VIVT=y CONFIG_CPU_TLB_V4WBI=y -CONFIG_CPU_MINICACHE=y # # Processor Features @@ -119,9 +121,11 @@ CONFIG_XSCALE_PMU=y # # Bus support # +CONFIG_ISA_DMA_API=y CONFIG_PCI=y CONFIG_PCI_LEGACY_PROC=y CONFIG_PCI_NAMES=y +# CONFIG_PCI_DEBUG is not set # # PCCARD (PCMCIA/CardBus) support @@ -131,7 +135,15 @@ CONFIG_PCI_NAMES=y # # Kernel Features # +# CONFIG_SMP is not set # CONFIG_PREEMPT is not set +# CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y CONFIG_ALIGNMENT_TRAP=y # @@ -270,7 +282,6 @@ CONFIG_MTD_IXP2000=y # # Block devices # -# CONFIG_BLK_DEV_FD is not set # CONFIG_BLK_CPQ_DA is not set # CONFIG_BLK_CPQ_CISS_DA is not set # CONFIG_BLK_DEV_DAC960 is not set @@ -309,6 +320,7 @@ CONFIG_IOSCHED_CFQ=y # # Fusion MPT device support # +# CONFIG_FUSION is not set # # IEEE 1394 (FireWire) support @@ -330,10 +342,11 @@ CONFIG_NET=y # CONFIG_PACKET=y CONFIG_PACKET_MMAP=y -# CONFIG_NETLINK_DEV is not set CONFIG_UNIX=y # CONFIG_NET_KEY is not set CONFIG_INET=y +CONFIG_IP_FIB_HASH=y +# CONFIG_IP_FIB_TRIE is not set # CONFIG_IP_MULTICAST is not set # CONFIG_IP_ADVANCED_ROUTER is not set CONFIG_IP_PNP=y @@ -350,6 +363,17 @@ CONFIG_SYN_COOKIES=y # CONFIG_INET_TUNNEL is not set # CONFIG_IP_TCPDIAG is not set # CONFIG_IP_TCPDIAG_IPV6 is not set + +# +# TCP congestion control +# +CONFIG_TCP_CONG_BIC=y +CONFIG_TCP_CONG_WESTWOOD=m +CONFIG_TCP_CONG_HTCP=m +# CONFIG_TCP_CONG_HSTCP is not set +# CONFIG_TCP_CONG_HYBLA is not set +# CONFIG_TCP_CONG_VEGAS is not set +# CONFIG_TCP_CONG_SCALABLE is not set # CONFIG_IPV6 is not set # CONFIG_NETFILTER is not set @@ -405,6 +429,7 @@ CONFIG_MII=y # CONFIG_SUNGEM is not set # CONFIG_NET_VENDOR_3COM is not set # CONFIG_SMC91X is not set +# CONFIG_DM9000 is not set # # Tulip family network device support @@ -442,9 +467,11 @@ CONFIG_EEPRO100=y # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set # CONFIG_R8169 is not set +# CONFIG_SKGE is not set # CONFIG_SK98LIN is not set # CONFIG_VIA_VELOCITY is not set # CONFIG_TIGON3 is not set +# CONFIG_BNX2 is not set # # Ethernet (10000 Mbit) @@ -466,6 +493,7 @@ CONFIG_EEPRO100=y # Wan interfaces # CONFIG_WAN=y +# CONFIG_DSCC4 is not set # CONFIG_LANMEDIA is not set # CONFIG_SYNCLINK_SYNCPPP is not set CONFIG_HDLC=y @@ -528,7 +556,6 @@ CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 # # CONFIG_SERIO is not set # CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y # # Character devices @@ -549,6 +576,7 @@ CONFIG_SERIAL_8250_NR_UARTS=2 # CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_JSM is not set CONFIG_UNIX98_PTYS=y CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 @@ -615,17 +643,18 @@ CONFIG_I2C_ALGOBIT=y # CONFIG_I2C_AMD8111 is not set # CONFIG_I2C_I801 is not set # CONFIG_I2C_I810 is not set +# CONFIG_I2C_PIIX4 is not set # CONFIG_I2C_ISA is not set # CONFIG_I2C_IXP2000 is not set # CONFIG_I2C_NFORCE2 is not set # CONFIG_I2C_PARPORT_LIGHT is not set -# CONFIG_I2C_PIIX4 is not set # CONFIG_I2C_PROSAVAGE is not set # CONFIG_I2C_SAVAGE4 is not set # CONFIG_SCx200_ACB is not set # CONFIG_I2C_SIS5595 is not set # CONFIG_I2C_SIS630 is not set # CONFIG_I2C_SIS96X is not set +# CONFIG_I2C_STUB is not set # CONFIG_I2C_VIA is not set # CONFIG_I2C_VIAPRO is not set # CONFIG_I2C_VOODOO3 is not set @@ -639,7 +668,9 @@ CONFIG_I2C_SENSOR=y # CONFIG_SENSORS_ADM1025 is not set # CONFIG_SENSORS_ADM1026 is not set # CONFIG_SENSORS_ADM1031 is not set +# CONFIG_SENSORS_ADM9240 is not set # CONFIG_SENSORS_ASB100 is not set +# CONFIG_SENSORS_ATXP1 is not set # CONFIG_SENSORS_DS1621 is not set # CONFIG_SENSORS_FSCHER is not set # CONFIG_SENSORS_FSCPOS is not set @@ -655,6 +686,7 @@ CONFIG_I2C_SENSOR=y # CONFIG_SENSORS_LM85 is not set # CONFIG_SENSORS_LM87 is not set # CONFIG_SENSORS_LM90 is not set +# CONFIG_SENSORS_LM92 is not set # CONFIG_SENSORS_MAX1619 is not set # CONFIG_SENSORS_PC87360 is not set # CONFIG_SENSORS_SMSC47B397 is not set @@ -664,14 +696,19 @@ CONFIG_I2C_SENSOR=y # CONFIG_SENSORS_W83781D is not set # CONFIG_SENSORS_W83L785TS is not set # CONFIG_SENSORS_W83627HF is not set +# CONFIG_SENSORS_W83627EHF is not set # # Other I2C Chip support # +# CONFIG_SENSORS_DS1337 is not set +# CONFIG_SENSORS_DS1374 is not set CONFIG_SENSORS_EEPROM=y # CONFIG_SENSORS_PCF8574 is not set +# CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set # CONFIG_SENSORS_RTC8564 is not set +# CONFIG_SENSORS_MAX6875 is not set # CONFIG_I2C_DEBUG_CORE is not set # CONFIG_I2C_DEBUG_ALGO is not set # CONFIG_I2C_DEBUG_BUS is not set @@ -725,6 +762,7 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y # CONFIG_EXT2_FS_SECURITY is not set +# CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y @@ -765,7 +803,6 @@ CONFIG_DNOTIFY=y # CONFIG_PROC_FS=y CONFIG_SYSFS=y -# CONFIG_DEVFS_FS is not set # CONFIG_DEVPTS_FS_XATTR is not set CONFIG_TMPFS=y # CONFIG_TMPFS_XATTR is not set @@ -803,12 +840,14 @@ CONFIG_JFFS2_RTIME=y # CONFIG_NFS_FS=y CONFIG_NFS_V3=y +# CONFIG_NFS_V3_ACL is not set # CONFIG_NFS_V4 is not set # CONFIG_NFS_DIRECTIO is not set # CONFIG_NFSD is not set CONFIG_ROOT_NFS=y CONFIG_LOCKD=y CONFIG_LOCKD_V4=y +CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y # CONFIG_RPCSEC_GSS_KRB5 is not set # CONFIG_RPCSEC_GSS_SPKM3 is not set @@ -893,3 +932,4 @@ CONFIG_CRC32=y # CONFIG_LIBCRC32C is not set CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=y +# CONFIG_TEXTSEARCH is not set -- cgit v1.2.3 From ea23d1ac7e380aefc00d691c7c9a3e747bc2531f Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Sat, 25 Jun 2005 16:58:21 +0100 Subject: [PATCH] ARM: 2750/1: add i2c platform device for enp2611 on-board i2c bus Patch from Lennert Buytenhek On the enp2611, GPIO 7 and 6 are connected to an on-board i2c bus that attaches to the SODIMM module slot (for SPD) and an LM84 temperature sensor. Add a platform device for this i2c bus. Signed-off-by: Lennert Buytenhek Signed-off-by: Russell King --- arch/arm/mach-ixp2000/enp2611.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-ixp2000/enp2611.c b/arch/arm/mach-ixp2000/enp2611.c index 04b38bcf9aa..f3a291b6a9f 100644 --- a/arch/arm/mach-ixp2000/enp2611.c +++ b/arch/arm/mach-ixp2000/enp2611.c @@ -197,8 +197,23 @@ static struct platform_device enp2611_flash = { .resource = &enp2611_flash_resource, }; +static struct ixp2000_i2c_pins enp2611_i2c_gpio_pins = { + .sda_pin = ENP2611_GPIO_SDA, + .scl_pin = ENP2611_GPIO_SCL, +}; + +static struct platform_device enp2611_i2c_controller = { + .name = "IXP2000-I2C", + .id = 0, + .dev = { + .platform_data = &enp2611_i2c_gpio_pins + }, + .num_resources = 0 +}; + static struct platform_device *enp2611_devices[] __initdata = { - &enp2611_flash + &enp2611_flash, + &enp2611_i2c_controller }; static void __init enp2611_init_machine(void) -- cgit v1.2.3 From 8144f56bd1e49015f94b8da99b24c4d04643b66d Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Sat, 25 Jun 2005 16:58:22 +0100 Subject: [PATCH] ARM: 2751/1: ixp2000 gpio cleanup broke ixdp2800 build Patch from Lennert Buytenhek The ixp2000 gpio cleanup broke the ixdp2800 build as it moved some gpio-related functions from arch/platform.h to arch/gpio.h and the ixdp2x00 support code used those functions but didn't include the latter header file. Signed-off-by: Lennert Buytenhek Signed-off-by: Russell King --- arch/arm/mach-ixp2000/ixdp2x00.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/mach-ixp2000/ixdp2x00.c b/arch/arm/mach-ixp2000/ixdp2x00.c index 21c41fe15b9..5e4380747b5 100644 --- a/arch/arm/mach-ixp2000/ixdp2x00.c +++ b/arch/arm/mach-ixp2000/ixdp2x00.c @@ -42,6 +42,9 @@ #include #include +#include + + /************************************************************************* * IXDP2x00 IRQ Initialization *************************************************************************/ -- cgit v1.2.3 From 7be426c6e3a8ad7dcc8791589cea8af7aaafdf6f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 25 Jun 2005 10:01:36 -0700 Subject: ACPI: Make sure we call acpi_register_gsi() even for default PCI interrupt assignment That's the part that keeps track of the ELCR register, and we want to make sure that the PCI interrupts are properly marked level/low. --- drivers/acpi/pci_irq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c index 8093f2e0032..8dbf802ee7f 100644 --- a/drivers/acpi/pci_irq.c +++ b/drivers/acpi/pci_irq.c @@ -435,6 +435,7 @@ acpi_pci_irq_enable ( /* Interrupt Line values above 0xF are forbidden */ if (dev->irq >= 0 && (dev->irq <= 0xF)) { printk(" - using IRQ %d\n", dev->irq); + acpi_register_gsi(dev->irq, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW); return_VALUE(0); } else { -- cgit v1.2.3 From 3cd9e19ebc91593c9f076410d6f979be188f01a0 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 25 Jun 2005 19:29:34 +0100 Subject: [PATCH] ARM: Fix discontigmem The merge of sparsemem broke ARM discontigmem. Fix it. Signed-off-by: Russell King --- arch/arm/Kconfig | 2 +- arch/arm/mach-clps711x/Kconfig | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 07ba77c19f6..bfcf4228036 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -157,7 +157,7 @@ config ARCH_RPC config ARCH_SA1100 bool "SA1100-based" select ISA - select DISCONTIGMEM + select ARCH_DISCONTIGMEM_ENABLE config ARCH_S3C2410 bool "Samsung S3C2410" diff --git a/arch/arm/mach-clps711x/Kconfig b/arch/arm/mach-clps711x/Kconfig index 45c930ccd06..0793dcf54f2 100644 --- a/arch/arm/mach-clps711x/Kconfig +++ b/arch/arm/mach-clps711x/Kconfig @@ -28,7 +28,7 @@ config ARCH_CLEP7312 config ARCH_EDB7211 bool "EDB7211" select ISA - select DISCONTIGMEM + select ARCH_DISCONTIGMEM_ENABLE help Say Y here if you intend to run this kernel on a Cirrus Logic EDB-7211 evaluation board. -- cgit v1.2.3 From 321ab6a5fab812658626aee6bce2617f8cfb3a55 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Sat, 25 Jun 2005 19:30:04 +0100 Subject: [PATCH] ARM: 2752/1: disable ixp2000 PCI I/O software workaround on chips that don't need it Patch from Lennert Buytenhek The later ixp2000 models don't need the PCI I/O workaround that we currently perform. Add a config option to disable the workaround, and panic on boot if a kernel without the workaround is booted on a buggy chip. As only pre-production ixp2000s need the workaround, the default is for it not to be configured in. Signed-off-by: Lennert Buytenhek Signed-off-by: Deepak Saxena Signed-off-by: Russell King --- arch/arm/configs/enp2611_defconfig | 1 + arch/arm/configs/ixdp2400_defconfig | 1 + arch/arm/configs/ixdp2401_defconfig | 1 + arch/arm/configs/ixdp2800_defconfig | 1 + arch/arm/configs/ixdp2801_defconfig | 1 + arch/arm/mach-ixp2000/Kconfig | 8 ++++++++ arch/arm/mach-ixp2000/pci.c | 13 +++++++++++++ include/asm-arm/arch-ixp2000/io.h | 20 ++++++++++++++------ include/asm-arm/arch-ixp2000/ixp2000-regs.h | 2 +- 9 files changed, 41 insertions(+), 7 deletions(-) diff --git a/arch/arm/configs/enp2611_defconfig b/arch/arm/configs/enp2611_defconfig index af215342456..b8c51ee7f1b 100644 --- a/arch/arm/configs/enp2611_defconfig +++ b/arch/arm/configs/enp2611_defconfig @@ -99,6 +99,7 @@ CONFIG_ARCH_ENP2611=y # CONFIG_ARCH_IXDP2800 is not set # CONFIG_ARCH_IXDP2401 is not set # CONFIG_ARCH_IXDP2801 is not set +# CONFIG_IXP2000_SUPPORT_BROKEN_PCI_IO is not set # # Processor Type diff --git a/arch/arm/configs/ixdp2400_defconfig b/arch/arm/configs/ixdp2400_defconfig index a7ee1a442f3..3cfbe2ec29c 100644 --- a/arch/arm/configs/ixdp2400_defconfig +++ b/arch/arm/configs/ixdp2400_defconfig @@ -100,6 +100,7 @@ CONFIG_ARCH_IXDP2400=y CONFIG_ARCH_IXDP2X00=y # CONFIG_ARCH_IXDP2401 is not set # CONFIG_ARCH_IXDP2801 is not set +# CONFIG_IXP2000_SUPPORT_BROKEN_PCI_IO is not set # # Processor Type diff --git a/arch/arm/configs/ixdp2401_defconfig b/arch/arm/configs/ixdp2401_defconfig index 2da48fca5c1..5c87e8e6969 100644 --- a/arch/arm/configs/ixdp2401_defconfig +++ b/arch/arm/configs/ixdp2401_defconfig @@ -100,6 +100,7 @@ CONFIG_ARCH_SUPPORTS_BIG_ENDIAN=y CONFIG_ARCH_IXDP2401=y # CONFIG_ARCH_IXDP2801 is not set CONFIG_ARCH_IXDP2X01=y +# CONFIG_IXP2000_SUPPORT_BROKEN_PCI_IO is not set # # Processor Type diff --git a/arch/arm/configs/ixdp2800_defconfig b/arch/arm/configs/ixdp2800_defconfig index 7ba867f751f..3cb561a551c 100644 --- a/arch/arm/configs/ixdp2800_defconfig +++ b/arch/arm/configs/ixdp2800_defconfig @@ -100,6 +100,7 @@ CONFIG_ARCH_IXDP2800=y CONFIG_ARCH_IXDP2X00=y # CONFIG_ARCH_IXDP2401 is not set # CONFIG_ARCH_IXDP2801 is not set +# CONFIG_IXP2000_SUPPORT_BROKEN_PCI_IO is not set # # Processor Type diff --git a/arch/arm/configs/ixdp2801_defconfig b/arch/arm/configs/ixdp2801_defconfig index c4df0ec34b0..b1e162f29cb 100644 --- a/arch/arm/configs/ixdp2801_defconfig +++ b/arch/arm/configs/ixdp2801_defconfig @@ -100,6 +100,7 @@ CONFIG_ARCH_SUPPORTS_BIG_ENDIAN=y # CONFIG_ARCH_IXDP2401 is not set CONFIG_ARCH_IXDP2801=y CONFIG_ARCH_IXDP2X01=y +# CONFIG_IXP2000_SUPPORT_BROKEN_PCI_IO is not set # # Processor Type diff --git a/arch/arm/mach-ixp2000/Kconfig b/arch/arm/mach-ixp2000/Kconfig index 9361e05f6fa..ecb58d83478 100644 --- a/arch/arm/mach-ixp2000/Kconfig +++ b/arch/arm/mach-ixp2000/Kconfig @@ -54,6 +54,14 @@ config ARCH_IXDP2X01 depends on ARCH_IXDP2401 || ARCH_IXDP2801 default y +config IXP2000_SUPPORT_BROKEN_PCI_IO + bool "Support broken PCI I/O on older IXP2000s" + default y + help + Say 'N' here if you only intend to run your kernel on an + IXP2000 B0 or later model and do not need the PCI I/O + byteswap workaround. Say 'Y' otherwise. + endmenu endif diff --git a/arch/arm/mach-ixp2000/pci.c b/arch/arm/mach-ixp2000/pci.c index 5ff2f2718c5..0788fb2b5c1 100644 --- a/arch/arm/mach-ixp2000/pci.c +++ b/arch/arm/mach-ixp2000/pci.c @@ -198,6 +198,19 @@ clear_master_aborts(void) void __init ixp2000_pci_preinit(void) { +#ifndef CONFIG_IXP2000_SUPPORT_BROKEN_PCI_IO + /* + * Configure the PCI unit to properly byteswap I/O transactions, + * and verify that it worked. + */ + ixp2000_reg_write(IXP2000_PCI_CONTROL, + (*IXP2000_PCI_CONTROL | PCI_CONTROL_IEE)); + + if ((*IXP2000_PCI_CONTROL & PCI_CONTROL_IEE) == 0) + panic("IXP2000: PCI I/O is broken on this ixp model, and " + "the needed workaround has not been configured in"); +#endif + hook_fault_code(16+6, ixp2000_pci_abort_handler, SIGBUS, "PCI config cycle to non-existent device"); } diff --git a/include/asm-arm/arch-ixp2000/io.h b/include/asm-arm/arch-ixp2000/io.h index 5e56b47446e..3241cd6f077 100644 --- a/include/asm-arm/arch-ixp2000/io.h +++ b/include/asm-arm/arch-ixp2000/io.h @@ -17,16 +17,21 @@ #define IO_SPACE_LIMIT 0xffffffff #define __mem_pci(a) (a) -#define ___io(p) ((void __iomem *)((p)+IXP2000_PCI_IO_VIRT_BASE)) /* - * The IXP2400 before revision B0 asserts byte lanes for PCI I/O + * The A? revisions of the IXP2000s assert byte lanes for PCI I/O * transactions the other way round (MEM transactions don't have this - * issue), so we need to override the standard functions. B0 and later - * have a bit that can be set to 1 to get the 'proper' behavior, but - * since that isn't available on the A? revisions we just keep doing - * things manually. + * issue), so if we want to support those models, we need to override + * the standard I/O functions. + * + * B0 and later have a bit that can be set to 1 to get the proper + * behavior for I/O transactions, which then allows us to use the + * standard I/O functions. This is what we do if the user does not + * explicitly ask for support for pre-B0. */ +#ifdef CONFIG_IXP2000_SUPPORT_BROKEN_PCI_IO +#define ___io(p) ((void __iomem *)((p)+IXP2000_PCI_IO_VIRT_BASE)) + #define alignb(addr) (void __iomem *)((unsigned long)(addr) ^ 3) #define alignw(addr) (void __iomem *)((unsigned long)(addr) ^ 2) @@ -119,6 +124,9 @@ #define ioport_map(port, nr) ___io(port) #define ioport_unmap(addr) +#else +#define __io(p) ((void __iomem *)((p)+IXP2000_PCI_IO_VIRT_BASE)) +#endif #ifdef CONFIG_ARCH_IXDP2X01 diff --git a/include/asm-arm/arch-ixp2000/ixp2000-regs.h b/include/asm-arm/arch-ixp2000/ixp2000-regs.h index a1d9e181b10..5eb47d4bfbf 100644 --- a/include/asm-arm/arch-ixp2000/ixp2000-regs.h +++ b/include/asm-arm/arch-ixp2000/ixp2000-regs.h @@ -241,7 +241,7 @@ #define PCI_CONTROL_BE_DEI (1 << 21) /* Big Endian Data Enable In */ #define PCI_CONTROL_BE_BEO (1 << 20) /* Big Endian Byte Enable Out */ #define PCI_CONTROL_BE_BEI (1 << 19) /* Big Endian Byte Enable In */ -#define PCI_CONTROL_PNR (1 << 17) /* PCI Not Reset bit */ +#define PCI_CONTROL_IEE (1 << 17) /* I/O cycle Endian swap Enable */ #define IXP2000_PCI_RST_REL (1 << 2) #define CFG_RST_DIR (*IXP2000_PCI_CONTROL & IXP2000_PCICNTL_PCF) -- cgit v1.2.3 From 8749af68216e1ebf6460992fce548f400ecf63a4 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 25 Jun 2005 19:39:45 +0100 Subject: [PATCH] ARM: Generic Dynamic Tick Timer support for ARM, take 4 This patch adds support for Dynamic Tick Timer for ARM. Dynamic Tick is also known as VST (Variable Scheduling Timeouts). Dynamic Tick has been in use in the OMAP tree since last October. The patch is not intrusive, and does not do anything unless CONFIG_NO_IDLE_HZ is defined. This patch has the following fixed based on comments from RMK: - Time is updated before calling interrupt handlers. - Added new interrupt flag SA_TIMER to avoid duplicate timer interrupts - Moved struct dyn_tick_timer to time.h until we at some point probably have an arch independent dyn-tick.h - Cleaned up testing for DYN_TICK_ENABLED in irq.c I've cleaned up this patch to fix some remaining issues: - Call the timer tick handler with irqs disabled, as it would be from a normal interrupt - if we have a dyn_tick, we better implement all methods. - generic timer_dyn_reprogram() call, to be called before sleeping - added command line option - "dyntick=" to allow boot-time control of this feature -- rmk Signed-off-by: Tony Lindgren Signed-off-by: Russell King --- arch/arm/Kconfig | 15 +++++++ arch/arm/kernel/irq.c | 14 ++++++ arch/arm/kernel/time.c | 103 ++++++++++++++++++++++++++++++++++++++++++++ include/asm-arm/mach/time.h | 21 +++++++++ include/asm-arm/signal.h | 1 + 5 files changed, 154 insertions(+) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index bfcf4228036..c8d94dcd8ef 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -346,6 +346,21 @@ config PREEMPT Say Y here if you are building a kernel for a desktop, embedded or real-time system. Say N if you are unsure. +config NO_IDLE_HZ + bool "Dynamic tick timer" + help + Select this option if you want to disable continuous timer ticks + and have them programmed to occur as required. This option saves + power as the system can remain in idle state for longer. + + By default dynamic tick is disabled during the boot, and can be + manually enabled with: + + echo 1 > /sys/devices/system/timer/timer0/dyn_tick + + Alternatively, if you want dynamic tick automatically enabled + during boot, pass "dyntick=enable" via the kernel command string. + config ARCH_DISCONTIGMEM_ENABLE bool default (ARCH_LH7A40X && !LH7A40X_CONTIGMEM) diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index ff187f4308f..395137a8fad 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -4,6 +4,10 @@ * Copyright (C) 1992 Linus Torvalds * Modifications for ARM processor Copyright (C) 1995-2000 Russell King. * + * Support for Dynamic Tick Timer Copyright (C) 2004-2005 Nokia Corporation. + * Dynamic Tick Timer written by Tony Lindgren and + * Tuukka Tikkanen . + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. @@ -37,6 +41,7 @@ #include #include #include +#include /* * Maximum IRQ count. Currently, this is arbitary. However, it should @@ -329,6 +334,15 @@ __do_irq(unsigned int irq, struct irqaction *action, struct pt_regs *regs) spin_unlock(&irq_controller_lock); +#ifdef CONFIG_NO_IDLE_HZ + if (!(action->flags & SA_TIMER) && system_timer->dyn_tick != NULL) { + write_seqlock(&xtime_lock); + if (system_timer->dyn_tick->state & DYN_TICK_ENABLED) + system_timer->dyn_tick->handler(irq, 0, regs); + write_sequnlock(&xtime_lock); + } +#endif + if (!(action->flags & SA_INTERRUPT)) local_irq_enable(); diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index c232f24f4a6..06054c9ba07 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -381,6 +381,95 @@ static struct sysdev_class timer_sysclass = { .resume = timer_resume, }; +#ifdef CONFIG_NO_IDLE_HZ +static int timer_dyn_tick_enable(void) +{ + struct dyn_tick_timer *dyn_tick = system_timer->dyn_tick; + unsigned long flags; + int ret = -ENODEV; + + if (dyn_tick) { + write_seqlock_irqsave(&xtime_lock, flags); + ret = 0; + if (!(dyn_tick->state & DYN_TICK_ENABLED)) { + ret = dyn_tick->enable(); + + if (ret == 0) + dyn_tick->state |= DYN_TICK_ENABLED; + } + write_sequnlock_irqrestore(&xtime_lock, flags); + } + + return ret; +} + +static int timer_dyn_tick_disable(void) +{ + struct dyn_tick_timer *dyn_tick = system_timer->dyn_tick; + unsigned long flags; + int ret = -ENODEV; + + if (dyn_tick) { + write_seqlock_irqsave(&xtime_lock, flags); + ret = 0; + if (dyn_tick->state & DYN_TICK_ENABLED) { + ret = dyn_tick->disable(); + + if (ret == 0) + dyn_tick->state &= ~DYN_TICK_ENABLED; + } + write_sequnlock_irqrestore(&xtime_lock, flags); + } + + return ret; +} + +void timer_dyn_reprogram(void) +{ + struct dyn_tick_timer *dyn_tick = system_timer->dyn_tick; + unsigned long flags; + + write_seqlock_irqsave(&xtime_lock, flags); + if (dyn_tick->state & DYN_TICK_ENABLED) + dyn_tick->reprogram(next_timer_interrupt() - jiffies); + write_sequnlock_irqrestore(&xtime_lock, flags); +} + +static ssize_t timer_show_dyn_tick(struct sys_device *dev, char *buf) +{ + return sprintf(buf, "%i\n", + (system_timer->dyn_tick->state & DYN_TICK_ENABLED) >> 1); +} + +static ssize_t timer_set_dyn_tick(struct sys_device *dev, const char *buf, + size_t count) +{ + unsigned int enable = simple_strtoul(buf, NULL, 2); + + if (enable) + timer_dyn_tick_enable(); + else + timer_dyn_tick_disable(); + + return count; +} +static SYSDEV_ATTR(dyn_tick, 0644, timer_show_dyn_tick, timer_set_dyn_tick); + +/* + * dyntick=enable|disable + */ +static char dyntick_str[4] __initdata = ""; + +static int __init dyntick_setup(char *str) +{ + if (str) + strlcpy(dyntick_str, str, sizeof(dyntick_str)); + return 1; +} + +__setup("dyntick=", dyntick_setup); +#endif + static int __init timer_init_sysfs(void) { int ret = sysdev_class_register(&timer_sysclass); @@ -388,6 +477,20 @@ static int __init timer_init_sysfs(void) system_timer->dev.cls = &timer_sysclass; ret = sysdev_register(&system_timer->dev); } + +#ifdef CONFIG_NO_IDLE_HZ + if (ret == 0 && system_timer->dyn_tick) { + ret = sysdev_create_file(&system_timer->dev, &attr_dyn_tick); + + /* + * Turn on dynamic tick after calibrate delay + * for correct bogomips + */ + if (ret == 0 && dyntick_str[0] == 'e') + ret = timer_dyn_tick_enable(); + } +#endif + return ret; } diff --git a/include/asm-arm/mach/time.h b/include/asm-arm/mach/time.h index 5cf4fd659fd..047980ad18d 100644 --- a/include/asm-arm/mach/time.h +++ b/include/asm-arm/mach/time.h @@ -39,8 +39,29 @@ struct sys_timer { void (*suspend)(void); void (*resume)(void); unsigned long (*offset)(void); + +#ifdef CONFIG_NO_IDLE_HZ + struct dyn_tick_timer *dyn_tick; +#endif +}; + +#ifdef CONFIG_NO_IDLE_HZ + +#define DYN_TICK_SKIPPING (1 << 2) +#define DYN_TICK_ENABLED (1 << 1) +#define DYN_TICK_SUITABLE (1 << 0) + +struct dyn_tick_timer { + unsigned int state; /* Current state */ + int (*enable)(void); /* Enables dynamic tick */ + int (*disable)(void); /* Disables dynamic tick */ + void (*reprogram)(unsigned long); /* Reprograms the timer */ + int (*handler)(int, void *, struct pt_regs *); }; +void timer_dyn_reprogram(void); +#endif + extern struct sys_timer *system_timer; extern void timer_tick(struct pt_regs *); diff --git a/include/asm-arm/signal.h b/include/asm-arm/signal.h index 46e69ae395a..760f6e65af0 100644 --- a/include/asm-arm/signal.h +++ b/include/asm-arm/signal.h @@ -114,6 +114,7 @@ typedef unsigned long sigset_t; #define SIGSTKSZ 8192 #ifdef __KERNEL__ +#define SA_TIMER 0x40000000 #define SA_IRQNOMASK 0x08000000 #endif -- cgit v1.2.3 From b659980014a7f45bfd6a1da9039a0498a28b7a63 Mon Sep 17 00:00:00 2001 From: Uwe Bugla Date: Sat, 25 Jun 2005 14:54:21 -0700 Subject: [PATCH] dvb-bt8xx.txt fixes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/dvb/bt8xx.txt | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/Documentation/dvb/bt8xx.txt b/Documentation/dvb/bt8xx.txt index d64430bf4bb..3a326079475 100644 --- a/Documentation/dvb/bt8xx.txt +++ b/Documentation/dvb/bt8xx.txt @@ -44,26 +44,23 @@ TwinHan (dst) are loaded automatically by the dvb-bt8xx device driver. $ modprobe dst The value 0x71 will override the PCI type detection for dvb-bt8xx, -which is necessary for TwinHan cards. +which is necessary for TwinHan cards. If you're having an older card (blue color circuit) and card=0x71 locks your machine, try using 0x68, too. If that does not work, ask on the mailing list. -The DST module takes a couple of useful parameters. +The DST module takes a couple of useful parameters: -verbose takes values 0 to 5. These values control the verbosity level. - -debug takes values 0 and 1. You can either disable or enable debugging. - -dst_addons takes values 0 and 0x20. A value of 0 means it is a FTA card. -0x20 means it has a Conditional Access slot. - -The autodected values are determined bythe cards 'response -string' which you can see in your logs e.g. - -dst_get_device_id: Recognise [DSTMCI] +a. verbose takes values 0 to 5. These values control the verbosity level. +b. debug takes values 0 and 1. You can either disable or enable debugging. +c. dst_addons takes values 0 and 0x20: +- A value of 0 means it is a FTA card. +- A value of 0x20 means it has a Conditional Access slot. +The autodetected values are determined by the "response string" +of the card, which you can see in your logs: +e.g.: dst_get_device_id: Recognize [DSTMCI] -- -Authors: Richard Walker, Jamie Honan, Michael Hunold, Manu Abraham +Authors: Richard Walker, Jamie Honan, Michael Hunold, Manu Abraham, Uwe Bugla -- cgit v1.2.3 From 5cdb7b48d0d5963e40bb6621bfa7b2d5fddc4562 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sat, 25 Jun 2005 14:54:22 -0700 Subject: [PATCH] cx88 build fix static declaration of cx88_pci_irqs follows non-static. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/media/video/cx88/cx88.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/media/video/cx88/cx88.h b/drivers/media/video/cx88/cx88.h index ac0dc27bb38..867e988a5a9 100644 --- a/drivers/media/video/cx88/cx88.h +++ b/drivers/media/video/cx88/cx88.h @@ -435,7 +435,6 @@ struct cx8802_dev { /* ----------------------------------------------------------- */ /* cx88-core.c */ -extern char *cx88_pci_irqs[32]; extern char *cx88_vid_irqs[32]; extern char *cx88_mpeg_irqs[32]; extern void cx88_print_irqbits(char *name, char *tag, char **strings, -- cgit v1.2.3 From 3f5f7e2eeb539da95157d7fa8c94fb2f3284b9cc Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sat, 25 Jun 2005 14:54:22 -0700 Subject: [PATCH] Toshiba driver cleanup Toshiba legacy driver cleanup: - use module_init/module_exit for initialization instead of using #ifdef MODULE and calling tosh_init manually from drivers/char/misc.c - do not explicitly initialize static variables - some whitespace and formatting cleanups Signed-off-by: Dmitry Torokhov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/misc.c | 4 ---- drivers/char/toshiba.c | 60 ++++++++++++++++++++------------------------------ 2 files changed, 24 insertions(+), 40 deletions(-) diff --git a/drivers/char/misc.c b/drivers/char/misc.c index 3115d318b99..f7e838eae19 100644 --- a/drivers/char/misc.c +++ b/drivers/char/misc.c @@ -66,7 +66,6 @@ static unsigned char misc_minors[DYNAMIC_MINORS / 8]; extern int rtc_DP8570A_init(void); extern int rtc_MK48T08_init(void); extern int pmu_device_init(void); -extern int tosh_init(void); extern int i8k_init(void); #ifdef CONFIG_PROC_FS @@ -314,9 +313,6 @@ static int __init misc_init(void) #ifdef CONFIG_PMAC_PBOOK pmu_device_init(); #endif -#ifdef CONFIG_TOSHIBA - tosh_init(); -#endif #ifdef CONFIG_I8K i8k_init(); #endif diff --git a/drivers/char/toshiba.c b/drivers/char/toshiba.c index 58e21fe4426..0c6f521abd0 100644 --- a/drivers/char/toshiba.c +++ b/drivers/char/toshiba.c @@ -73,16 +73,20 @@ #define TOSH_MINOR_DEV 181 -static int tosh_id = 0x0000; -static int tosh_bios = 0x0000; -static int tosh_date = 0x0000; -static int tosh_sci = 0x0000; -static int tosh_fan = 0; - -static int tosh_fn = 0; +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jonathan Buzzard "); +MODULE_DESCRIPTION("Toshiba laptop SMM driver"); +MODULE_SUPPORTED_DEVICE("toshiba"); -module_param(tosh_fn, int, 0); +static int tosh_fn; +module_param_named(fn, tosh_fn, int, 0); +MODULE_PARM_DESC(fn, "User specified Fn key detection port"); +static int tosh_id; +static int tosh_bios; +static int tosh_date; +static int tosh_sci; +static int tosh_fan; static int tosh_ioctl(struct inode *, struct file *, unsigned int, unsigned long); @@ -359,7 +363,7 @@ static int tosh_get_machine_id(void) unsigned long address; id = (0x100*(int) isa_readb(0xffffe))+((int) isa_readb(0xffffa)); - + /* do we have a SCTTable machine identication number on our hands */ if (id==0xfc2f) { @@ -424,7 +428,7 @@ static int tosh_probe(void) } /* call the Toshiba SCI support check routine */ - + regs.eax = 0xf0f0; regs.ebx = 0x0000; regs.ecx = 0x0000; @@ -440,7 +444,7 @@ static int tosh_probe(void) /* if we get this far then we are running on a Toshiba (probably)! */ tosh_sci = regs.edx & 0xffff; - + /* next get the machine ID of the current laptop */ tosh_id = tosh_get_machine_id(); @@ -475,16 +479,15 @@ static int tosh_probe(void) return 0; } -int __init tosh_init(void) +static int __init toshiba_init(void) { int retval; /* are we running on a Toshiba laptop */ - if (tosh_probe()!=0) - return -EIO; + if (tosh_probe()) + return -ENODEV; - printk(KERN_INFO "Toshiba System Managment Mode driver v" - TOSH_VERSION"\n"); + printk(KERN_INFO "Toshiba System Managment Mode driver v" TOSH_VERSION "\n"); /* set the port to use for Fn status if not specified as a parameter */ if (tosh_fn==0x00) @@ -492,12 +495,12 @@ int __init tosh_init(void) /* register the device file */ retval = misc_register(&tosh_device); - if(retval < 0) + if (retval < 0) return retval; #ifdef CONFIG_PROC_FS /* register the proc entry */ - if(create_proc_info_entry("toshiba", 0, NULL, tosh_get_info) == NULL){ + if (create_proc_info_entry("toshiba", 0, NULL, tosh_get_info) == NULL) { misc_deregister(&tosh_device); return -ENOMEM; } @@ -506,27 +509,12 @@ int __init tosh_init(void) return 0; } -#ifdef MODULE -int init_module(void) -{ - return tosh_init(); -} - -void cleanup_module(void) +static void __exit toshiba_exit(void) { - /* remove the proc entry */ - remove_proc_entry("toshiba", NULL); - - /* unregister the device file */ - misc_deregister(&tosh_device); } -#endif -MODULE_LICENSE("GPL"); -MODULE_PARM_DESC(tosh_fn, "User specified Fn key detection port"); -MODULE_AUTHOR("Jonathan Buzzard "); -MODULE_DESCRIPTION("Toshiba laptop SMM driver"); -MODULE_SUPPORTED_DEVICE("toshiba"); +module_init(toshiba_init); +module_exit(toshiba_exit); -- cgit v1.2.3 From dec63ec32ea486ab915138e8790084c22a3f7bf6 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sat, 25 Jun 2005 14:54:23 -0700 Subject: [PATCH] I8K: pass through lindent I8K: pass through Lindent to change 4 spaces identation to TABs Signed-off-by: Dmitry Torokhov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/i8k.c | 950 ++++++++++++++++++++++++++--------------------------- 1 file changed, 475 insertions(+), 475 deletions(-) diff --git a/drivers/char/i8k.c b/drivers/char/i8k.c index a8119764028..bf5e43beca6 100644 --- a/drivers/char/i8k.c +++ b/drivers/char/i8k.c @@ -55,14 +55,14 @@ #define DELL_SIGNATURE "Dell Computer" static char *supported_models[] = { - "Inspiron", - "Latitude", - NULL + "Inspiron", + "Latitude", + NULL }; static char system_vendor[48] = "?"; -static char product_name [48] = "?"; -static char bios_version [4] = "?"; +static char product_name[48] = "?"; +static char bios_version[4] = "?"; static char serial_number[16] = "?"; MODULE_AUTHOR("Massimo Dal Zotto (dz@debian.org)"); @@ -86,64 +86,63 @@ static int i8k_ioctl(struct inode *, struct file *, unsigned int, unsigned long); static struct file_operations i8k_fops = { - .read = i8k_read, - .ioctl = i8k_ioctl, + .read = i8k_read, + .ioctl = i8k_ioctl, }; typedef struct { - unsigned int eax; - unsigned int ebx __attribute__ ((packed)); - unsigned int ecx __attribute__ ((packed)); - unsigned int edx __attribute__ ((packed)); - unsigned int esi __attribute__ ((packed)); - unsigned int edi __attribute__ ((packed)); + unsigned int eax; + unsigned int ebx __attribute__ ((packed)); + unsigned int ecx __attribute__ ((packed)); + unsigned int edx __attribute__ ((packed)); + unsigned int esi __attribute__ ((packed)); + unsigned int edi __attribute__ ((packed)); } SMMRegisters; typedef struct { - u8 type; - u8 length; - u16 handle; + u8 type; + u8 length; + u16 handle; } DMIHeader; /* * Call the System Management Mode BIOS. Code provided by Jonathan Buzzard. */ -static int i8k_smm(SMMRegisters *regs) +static int i8k_smm(SMMRegisters * regs) { - int rc; - int eax = regs->eax; - - asm("pushl %%eax\n\t" \ - "movl 0(%%eax),%%edx\n\t" \ - "push %%edx\n\t" \ - "movl 4(%%eax),%%ebx\n\t" \ - "movl 8(%%eax),%%ecx\n\t" \ - "movl 12(%%eax),%%edx\n\t" \ - "movl 16(%%eax),%%esi\n\t" \ - "movl 20(%%eax),%%edi\n\t" \ - "popl %%eax\n\t" \ - "out %%al,$0xb2\n\t" \ - "out %%al,$0x84\n\t" \ - "xchgl %%eax,(%%esp)\n\t" - "movl %%ebx,4(%%eax)\n\t" \ - "movl %%ecx,8(%%eax)\n\t" \ - "movl %%edx,12(%%eax)\n\t" \ - "movl %%esi,16(%%eax)\n\t" \ - "movl %%edi,20(%%eax)\n\t" \ - "popl %%edx\n\t" \ - "movl %%edx,0(%%eax)\n\t" \ - "lahf\n\t" \ - "shrl $8,%%eax\n\t" \ - "andl $1,%%eax\n" \ - : "=a" (rc) - : "a" (regs) - : "%ebx", "%ecx", "%edx", "%esi", "%edi", "memory"); - - if ((rc != 0) || ((regs->eax & 0xffff) == 0xffff) || (regs->eax == eax)) { - return -EINVAL; - } - - return 0; + int rc; + int eax = regs->eax; + + asm("pushl %%eax\n\t" + "movl 0(%%eax),%%edx\n\t" + "push %%edx\n\t" + "movl 4(%%eax),%%ebx\n\t" + "movl 8(%%eax),%%ecx\n\t" + "movl 12(%%eax),%%edx\n\t" + "movl 16(%%eax),%%esi\n\t" + "movl 20(%%eax),%%edi\n\t" + "popl %%eax\n\t" + "out %%al,$0xb2\n\t" + "out %%al,$0x84\n\t" + "xchgl %%eax,(%%esp)\n\t" + "movl %%ebx,4(%%eax)\n\t" + "movl %%ecx,8(%%eax)\n\t" + "movl %%edx,12(%%eax)\n\t" + "movl %%esi,16(%%eax)\n\t" + "movl %%edi,20(%%eax)\n\t" + "popl %%edx\n\t" + "movl %%edx,0(%%eax)\n\t" + "lahf\n\t" + "shrl $8,%%eax\n\t" + "andl $1,%%eax\n":"=a"(rc) + : "a"(regs) + : "%ebx", "%ecx", "%edx", "%esi", "%edi", "memory"); + + if ((rc != 0) || ((regs->eax & 0xffff) == 0xffff) || (regs->eax == eax)) { + return -EINVAL; + } + + return 0; } /* @@ -152,15 +151,15 @@ static int i8k_smm(SMMRegisters *regs) */ static int i8k_get_bios_version(void) { - SMMRegisters regs = { 0, 0, 0, 0, 0, 0 }; - int rc; + SMMRegisters regs = { 0, 0, 0, 0, 0, 0 }; + int rc; - regs.eax = I8K_SMM_BIOS_VERSION; - if ((rc=i8k_smm(®s)) < 0) { - return rc; - } + regs.eax = I8K_SMM_BIOS_VERSION; + if ((rc = i8k_smm(®s)) < 0) { + return rc; + } - return regs.eax; + return regs.eax; } /* @@ -168,8 +167,8 @@ static int i8k_get_bios_version(void) */ static int i8k_get_serial_number(unsigned char *buff) { - strlcpy(buff, serial_number, sizeof(serial_number)); - return 0; + strlcpy(buff, serial_number, sizeof(serial_number)); + return 0; } /* @@ -177,24 +176,24 @@ static int i8k_get_serial_number(unsigned char *buff) */ static int i8k_get_fn_status(void) { - SMMRegisters regs = { 0, 0, 0, 0, 0, 0 }; - int rc; - - regs.eax = I8K_SMM_FN_STATUS; - if ((rc=i8k_smm(®s)) < 0) { - return rc; - } - - switch ((regs.eax >> I8K_FN_SHIFT) & I8K_FN_MASK) { - case I8K_FN_UP: - return I8K_VOL_UP; - case I8K_FN_DOWN: - return I8K_VOL_DOWN; - case I8K_FN_MUTE: - return I8K_VOL_MUTE; - default: - return 0; - } + SMMRegisters regs = { 0, 0, 0, 0, 0, 0 }; + int rc; + + regs.eax = I8K_SMM_FN_STATUS; + if ((rc = i8k_smm(®s)) < 0) { + return rc; + } + + switch ((regs.eax >> I8K_FN_SHIFT) & I8K_FN_MASK) { + case I8K_FN_UP: + return I8K_VOL_UP; + case I8K_FN_DOWN: + return I8K_VOL_DOWN; + case I8K_FN_MUTE: + return I8K_VOL_MUTE; + default: + return 0; + } } /* @@ -202,20 +201,20 @@ static int i8k_get_fn_status(void) */ static int i8k_get_power_status(void) { - SMMRegisters regs = { 0, 0, 0, 0, 0, 0 }; - int rc; - - regs.eax = I8K_SMM_POWER_STATUS; - if ((rc=i8k_smm(®s)) < 0) { - return rc; - } - - switch (regs.eax & 0xff) { - case I8K_POWER_AC: - return I8K_AC; - default: - return I8K_BATTERY; - } + SMMRegisters regs = { 0, 0, 0, 0, 0, 0 }; + int rc; + + regs.eax = I8K_SMM_POWER_STATUS; + if ((rc = i8k_smm(®s)) < 0) { + return rc; + } + + switch (regs.eax & 0xff) { + case I8K_POWER_AC: + return I8K_AC; + default: + return I8K_BATTERY; + } } /* @@ -223,16 +222,16 @@ static int i8k_get_power_status(void) */ static int i8k_get_fan_status(int fan) { - SMMRegisters regs = { 0, 0, 0, 0, 0, 0 }; - int rc; + SMMRegisters regs = { 0, 0, 0, 0, 0, 0 }; + int rc; - regs.eax = I8K_SMM_GET_FAN; - regs.ebx = fan & 0xff; - if ((rc=i8k_smm(®s)) < 0) { - return rc; - } + regs.eax = I8K_SMM_GET_FAN; + regs.ebx = fan & 0xff; + if ((rc = i8k_smm(®s)) < 0) { + return rc; + } - return (regs.eax & 0xff); + return (regs.eax & 0xff); } /* @@ -240,16 +239,16 @@ static int i8k_get_fan_status(int fan) */ static int i8k_get_fan_speed(int fan) { - SMMRegisters regs = { 0, 0, 0, 0, 0, 0 }; - int rc; + SMMRegisters regs = { 0, 0, 0, 0, 0, 0 }; + int rc; - regs.eax = I8K_SMM_GET_SPEED; - regs.ebx = fan & 0xff; - if ((rc=i8k_smm(®s)) < 0) { - return rc; - } + regs.eax = I8K_SMM_GET_SPEED; + regs.ebx = fan & 0xff; + if ((rc = i8k_smm(®s)) < 0) { + return rc; + } - return (regs.eax & 0xffff) * I8K_FAN_MULT; + return (regs.eax & 0xffff) * I8K_FAN_MULT; } /* @@ -257,18 +256,18 @@ static int i8k_get_fan_speed(int fan) */ static int i8k_set_fan(int fan, int speed) { - SMMRegisters regs = { 0, 0, 0, 0, 0, 0 }; - int rc; + SMMRegisters regs = { 0, 0, 0, 0, 0, 0 }; + int rc; - speed = (speed < 0) ? 0 : ((speed > I8K_FAN_MAX) ? I8K_FAN_MAX : speed); + speed = (speed < 0) ? 0 : ((speed > I8K_FAN_MAX) ? I8K_FAN_MAX : speed); - regs.eax = I8K_SMM_SET_FAN; - regs.ebx = (fan & 0xff) | (speed << 8); - if ((rc=i8k_smm(®s)) < 0) { - return rc; - } + regs.eax = I8K_SMM_SET_FAN; + regs.ebx = (fan & 0xff) | (speed << 8); + if ((rc = i8k_smm(®s)) < 0) { + return rc; + } - return (i8k_get_fan_status(fan)); + return (i8k_get_fan_status(fan)); } /* @@ -276,143 +275,143 @@ static int i8k_set_fan(int fan, int speed) */ static int i8k_get_cpu_temp(void) { - SMMRegisters regs = { 0, 0, 0, 0, 0, 0 }; - int rc; - int temp; + SMMRegisters regs = { 0, 0, 0, 0, 0, 0 }; + int rc; + int temp; #ifdef I8K_TEMPERATURE_BUG - static int prev = 0; + static int prev = 0; #endif - regs.eax = I8K_SMM_GET_TEMP; - if ((rc=i8k_smm(®s)) < 0) { - return rc; - } - temp = regs.eax & 0xff; + regs.eax = I8K_SMM_GET_TEMP; + if ((rc = i8k_smm(®s)) < 0) { + return rc; + } + temp = regs.eax & 0xff; #ifdef I8K_TEMPERATURE_BUG - /* - * Sometimes the temperature sensor returns 0x99, which is out of range. - * In this case we return (once) the previous cached value. For example: - # 1003655137 00000058 00005a4b - # 1003655138 00000099 00003a80 <--- 0x99 = 153 degrees - # 1003655139 00000054 00005c52 - */ - if (temp > I8K_MAX_TEMP) { - temp = prev; - prev = I8K_MAX_TEMP; - } else { - prev = temp; - } + /* + * Sometimes the temperature sensor returns 0x99, which is out of range. + * In this case we return (once) the previous cached value. For example: + # 1003655137 00000058 00005a4b + # 1003655138 00000099 00003a80 <--- 0x99 = 153 degrees + # 1003655139 00000054 00005c52 + */ + if (temp > I8K_MAX_TEMP) { + temp = prev; + prev = I8K_MAX_TEMP; + } else { + prev = temp; + } #endif - return temp; + return temp; } static int i8k_get_dell_signature(void) { - SMMRegisters regs = { 0, 0, 0, 0, 0, 0 }; - int rc; + SMMRegisters regs = { 0, 0, 0, 0, 0, 0 }; + int rc; - regs.eax = I8K_SMM_GET_DELL_SIG; - if ((rc=i8k_smm(®s)) < 0) { - return rc; - } + regs.eax = I8K_SMM_GET_DELL_SIG; + if ((rc = i8k_smm(®s)) < 0) { + return rc; + } - if ((regs.eax == 1145651527) && (regs.edx == 1145392204)) { - return 0; - } else { - return -1; - } + if ((regs.eax == 1145651527) && (regs.edx == 1145392204)) { + return 0; + } else { + return -1; + } } static int i8k_ioctl(struct inode *ip, struct file *fp, unsigned int cmd, unsigned long arg) { - int val; - int speed; - unsigned char buff[16]; - int __user *argp = (int __user *)arg; - - if (!argp) - return -EINVAL; - - switch (cmd) { - case I8K_BIOS_VERSION: - val = i8k_get_bios_version(); - break; - - case I8K_MACHINE_ID: - memset(buff, 0, 16); - val = i8k_get_serial_number(buff); - break; - - case I8K_FN_STATUS: - val = i8k_get_fn_status(); - break; - - case I8K_POWER_STATUS: - val = i8k_get_power_status(); - break; + int val; + int speed; + unsigned char buff[16]; + int __user *argp = (int __user *)arg; + + if (!argp) + return -EINVAL; + + switch (cmd) { + case I8K_BIOS_VERSION: + val = i8k_get_bios_version(); + break; + + case I8K_MACHINE_ID: + memset(buff, 0, 16); + val = i8k_get_serial_number(buff); + break; + + case I8K_FN_STATUS: + val = i8k_get_fn_status(); + break; + + case I8K_POWER_STATUS: + val = i8k_get_power_status(); + break; + + case I8K_GET_TEMP: + val = i8k_get_cpu_temp(); + break; + + case I8K_GET_SPEED: + if (copy_from_user(&val, argp, sizeof(int))) { + return -EFAULT; + } + val = i8k_get_fan_speed(val); + break; - case I8K_GET_TEMP: - val = i8k_get_cpu_temp(); - break; + case I8K_GET_FAN: + if (copy_from_user(&val, argp, sizeof(int))) { + return -EFAULT; + } + val = i8k_get_fan_status(val); + break; - case I8K_GET_SPEED: - if (copy_from_user(&val, argp, sizeof(int))) { - return -EFAULT; - } - val = i8k_get_fan_speed(val); - break; + case I8K_SET_FAN: + if (restricted && !capable(CAP_SYS_ADMIN)) { + return -EPERM; + } + if (copy_from_user(&val, argp, sizeof(int))) { + return -EFAULT; + } + if (copy_from_user(&speed, argp + 1, sizeof(int))) { + return -EFAULT; + } + val = i8k_set_fan(val, speed); + break; - case I8K_GET_FAN: - if (copy_from_user(&val, argp, sizeof(int))) { - return -EFAULT; + default: + return -EINVAL; } - val = i8k_get_fan_status(val); - break; - case I8K_SET_FAN: - if (restricted && !capable(CAP_SYS_ADMIN)) { - return -EPERM; - } - if (copy_from_user(&val, argp, sizeof(int))) { - return -EFAULT; - } - if (copy_from_user(&speed, argp+1, sizeof(int))) { - return -EFAULT; + if (val < 0) { + return val; } - val = i8k_set_fan(val, speed); - break; - default: - return -EINVAL; - } - - if (val < 0) { - return val; - } - - switch (cmd) { - case I8K_BIOS_VERSION: - if (copy_to_user(argp, &val, 4)) { - return -EFAULT; - } - break; - case I8K_MACHINE_ID: - if (copy_to_user(argp, buff, 16)) { - return -EFAULT; - } - break; - default: - if (copy_to_user(argp, &val, sizeof(int))) { - return -EFAULT; + switch (cmd) { + case I8K_BIOS_VERSION: + if (copy_to_user(argp, &val, 4)) { + return -EFAULT; + } + break; + case I8K_MACHINE_ID: + if (copy_to_user(argp, buff, 16)) { + return -EFAULT; + } + break; + default: + if (copy_to_user(argp, &val, sizeof(int))) { + return -EFAULT; + } + break; } - break; - } - return 0; + return 0; } /* @@ -420,90 +419,87 @@ static int i8k_ioctl(struct inode *ip, struct file *fp, unsigned int cmd, */ static int i8k_get_info(char *buffer, char **start, off_t fpos, int length) { - int n, fn_key, cpu_temp, ac_power; - int left_fan, right_fan, left_speed, right_speed; - - cpu_temp = i8k_get_cpu_temp(); /* 11100 µs */ - left_fan = i8k_get_fan_status(I8K_FAN_LEFT); /* 580 µs */ - right_fan = i8k_get_fan_status(I8K_FAN_RIGHT); /* 580 µs */ - left_speed = i8k_get_fan_speed(I8K_FAN_LEFT); /* 580 µs */ - right_speed = i8k_get_fan_speed(I8K_FAN_RIGHT); /* 580 µs */ - fn_key = i8k_get_fn_status(); /* 750 µs */ - if (power_status) { - ac_power = i8k_get_power_status(); /* 14700 µs */ - } else { - ac_power = -1; - } - - /* - * Info: - * - * 1) Format version (this will change if format changes) - * 2) BIOS version - * 3) BIOS machine ID - * 4) Cpu temperature - * 5) Left fan status - * 6) Right fan status - * 7) Left fan speed - * 8) Right fan speed - * 9) AC power - * 10) Fn Key status - */ - n = sprintf(buffer, "%s %s %s %d %d %d %d %d %d %d\n", - I8K_PROC_FMT, - bios_version, - serial_number, - cpu_temp, - left_fan, - right_fan, - left_speed, - right_speed, - ac_power, - fn_key); - - return n; + int n, fn_key, cpu_temp, ac_power; + int left_fan, right_fan, left_speed, right_speed; + + cpu_temp = i8k_get_cpu_temp(); /* 11100 µs */ + left_fan = i8k_get_fan_status(I8K_FAN_LEFT); /* 580 µs */ + right_fan = i8k_get_fan_status(I8K_FAN_RIGHT); /* 580 µs */ + left_speed = i8k_get_fan_speed(I8K_FAN_LEFT); /* 580 µs */ + right_speed = i8k_get_fan_speed(I8K_FAN_RIGHT); /* 580 µs */ + fn_key = i8k_get_fn_status(); /* 750 µs */ + if (power_status) { + ac_power = i8k_get_power_status(); /* 14700 µs */ + } else { + ac_power = -1; + } + + /* + * Info: + * + * 1) Format version (this will change if format changes) + * 2) BIOS version + * 3) BIOS machine ID + * 4) Cpu temperature + * 5) Left fan status + * 6) Right fan status + * 7) Left fan speed + * 8) Right fan speed + * 9) AC power + * 10) Fn Key status + */ + n = sprintf(buffer, "%s %s %s %d %d %d %d %d %d %d\n", + I8K_PROC_FMT, + bios_version, + serial_number, + cpu_temp, + left_fan, + right_fan, left_speed, right_speed, ac_power, fn_key); + + return n; } -static ssize_t i8k_read(struct file *f, char __user *buffer, size_t len, loff_t *fpos) +static ssize_t i8k_read(struct file *f, char __user * buffer, size_t len, + loff_t * fpos) { - int n; - char info[128]; + int n; + char info[128]; - n = i8k_get_info(info, NULL, 0, 128); - if (n <= 0) { - return n; - } + n = i8k_get_info(info, NULL, 0, 128); + if (n <= 0) { + return n; + } - if (*fpos >= n) { - return 0; - } + if (*fpos >= n) { + return 0; + } - if ((*fpos + len) >= n) { - len = n - *fpos; - } + if ((*fpos + len) >= n) { + len = n - *fpos; + } - if (copy_to_user(buffer, info, len) != 0) { - return -EFAULT; - } + if (copy_to_user(buffer, info, len) != 0) { + return -EFAULT; + } - *fpos += len; - return len; + *fpos += len; + return len; } -static char* __init string_trim(char *s, int size) +static char *__init string_trim(char *s, int size) { - int len; - char *p; + int len; + char *p; - if ((len = strlen(s)) > size) { - len = size; - } + if ((len = strlen(s)) > size) { + len = size; + } - for (p=s+len-1; len && (*p==' '); len--,p--) { - *p = '\0'; - } + for (p = s + len - 1; len && (*p == ' '); len--, p--) { + *p = '\0'; + } - return s; + return s; } /* DMI code, stolen from arch/i386/kernel/dmi_scan.c */ @@ -515,111 +511,112 @@ static char* __init string_trim(char *s, int size) * | | * +-----------------------+ */ -static char* __init dmi_string(DMIHeader *dmi, u8 s) +static char *__init dmi_string(DMIHeader * dmi, u8 s) { - u8 *p; - - if (!s) { - return ""; - } - s--; + u8 *p; - p = (u8 *)dmi + dmi->length; - while (s > 0) { - p += strlen(p); - p++; + if (!s) { + return ""; + } s--; - } - return p; + p = (u8 *) dmi + dmi->length; + while (s > 0) { + p += strlen(p); + p++; + s--; + } + + return p; } -static void __init dmi_decode(DMIHeader *dmi) +static void __init dmi_decode(DMIHeader * dmi) { - u8 *data = (u8 *) dmi; - char *p; + u8 *data = (u8 *) dmi; + char *p; #ifdef I8K_DEBUG - int i; - printk("%08x ", (int)data); - for (i=0; itype) { - case 0: /* BIOS Information */ - p = dmi_string(dmi,data[5]); - if (*p) { - strlcpy(bios_version, p, sizeof(bios_version)); - string_trim(bios_version, sizeof(bios_version)); - } - break; - case 1: /* System Information */ - p = dmi_string(dmi,data[4]); - if (*p) { - strlcpy(system_vendor, p, sizeof(system_vendor)); - string_trim(system_vendor, sizeof(system_vendor)); - } - p = dmi_string(dmi,data[5]); - if (*p) { - strlcpy(product_name, p, sizeof(product_name)); - string_trim(product_name, sizeof(product_name)); - } - p = dmi_string(dmi,data[7]); - if (*p) { - strlcpy(serial_number, p, sizeof(serial_number)); - string_trim(serial_number, sizeof(serial_number)); - } - break; - } + switch (dmi->type) { + case 0: /* BIOS Information */ + p = dmi_string(dmi, data[5]); + if (*p) { + strlcpy(bios_version, p, sizeof(bios_version)); + string_trim(bios_version, sizeof(bios_version)); + } + break; + case 1: /* System Information */ + p = dmi_string(dmi, data[4]); + if (*p) { + strlcpy(system_vendor, p, sizeof(system_vendor)); + string_trim(system_vendor, sizeof(system_vendor)); + } + p = dmi_string(dmi, data[5]); + if (*p) { + strlcpy(product_name, p, sizeof(product_name)); + string_trim(product_name, sizeof(product_name)); + } + p = dmi_string(dmi, data[7]); + if (*p) { + strlcpy(serial_number, p, sizeof(serial_number)); + string_trim(serial_number, sizeof(serial_number)); + } + break; + } } -static int __init dmi_table(u32 base, int len, int num, void (*fn)(DMIHeader*)) +static int __init dmi_table(u32 base, int len, int num, + void (*fn) (DMIHeader *)) { - u8 *buf; - u8 *data; - DMIHeader *dmi; - int i = 1; + u8 *buf; + u8 *data; + DMIHeader *dmi; + int i = 1; - buf = ioremap(base, len); - if (buf == NULL) { - return -1; - } - data = buf; - - /* - * Stop when we see al the items the table claimed to have - * or we run off the end of the table (also happens) - */ - while ((ilength) >= len) { - break; + buf = ioremap(base, len); + if (buf == NULL) { + return -1; } - fn(dmi); - data += dmi->length; + data = buf; + /* - * Don't go off the end of the data if there is - * stuff looking like string fill past the end + * Stop when we see al the items the table claimed to have + * or we run off the end of the table (also happens) */ - while (((data-buf) < len) && (*data || data[1])) { - data++; + while ((i < num) && ((data - buf) < len)) { + dmi = (DMIHeader *) data; + /* + * Avoid misparsing crud if the length of the last + * record is crap + */ + if ((data - buf + dmi->length) >= len) { + break; + } + fn(dmi); + data += dmi->length; + /* + * Don't go off the end of the data if there is + * stuff looking like string fill past the end + */ + while (((data - buf) < len) && (*data || data[1])) { + data++; + } + data += 2; + i++; } - data += 2; - i++; - } - iounmap(buf); + iounmap(buf); - return 0; + return 0; } -static int __init dmi_iterate(void (*decode)(DMIHeader *)) +static int __init dmi_iterate(void (*decode) (DMIHeader *)) { unsigned char buf[20]; void __iomem *p = ioremap(0xe0000, 0x20000), *q; @@ -629,20 +626,20 @@ static int __init dmi_iterate(void (*decode)(DMIHeader *)) for (q = p; q < p + 0x20000; q += 16) { memcpy_fromio(buf, q, 20); - if (memcmp(buf, "_DMI_", 5)==0) { - u16 num = buf[13]<<8 | buf[12]; - u16 len = buf [7]<<8 | buf [6]; - u32 base = buf[11]<<24 | buf[10]<<16 | buf[9]<<8 | buf[8]; + if (memcmp(buf, "_DMI_", 5) == 0) { + u16 num = buf[13] << 8 | buf[12]; + u16 len = buf[7] << 8 | buf[6]; + u32 base = buf[11] << 24 | buf[10] << 16 | buf[9] << 8 | buf[8]; #ifdef I8K_DEBUG printk(KERN_INFO "DMI %d.%d present.\n", - buf[14]>>4, buf[14]&0x0F); + buf[14] >> 4, buf[14] & 0x0F); printk(KERN_INFO "%d structures occupying %d bytes.\n", - buf[13]<<8 | buf[12], - buf [7]<<8 | buf[6]); + buf[13] << 8 | buf[12], buf[7] << 8 | buf[6]); printk(KERN_INFO "DMI table at 0x%08X.\n", - buf[11]<<24 | buf[10]<<16 | buf[9]<<8 | buf[8]); + buf[11] << 24 | buf[10] << 16 | buf[9] << 8 | + buf[8]); #endif - if (dmi_table(base, len, num, decode)==0) { + if (dmi_table(base, len, num, decode) == 0) { iounmap(p); return 0; } @@ -651,6 +648,7 @@ static int __init dmi_iterate(void (*decode)(DMIHeader *)) iounmap(p); return -1; } + /* end of DMI code */ /* @@ -658,29 +656,30 @@ static int __init dmi_iterate(void (*decode)(DMIHeader *)) */ static int __init i8k_dmi_probe(void) { - char **p; + char **p; - if (dmi_iterate(dmi_decode) != 0) { - printk(KERN_INFO "i8k: unable to get DMI information\n"); - return -ENODEV; - } - - if (strncmp(system_vendor,DELL_SIGNATURE,strlen(DELL_SIGNATURE)) != 0) { - printk(KERN_INFO "i8k: not running on a Dell system\n"); - return -ENODEV; - } + if (dmi_iterate(dmi_decode) != 0) { + printk(KERN_INFO "i8k: unable to get DMI information\n"); + return -ENODEV; + } - for (p=supported_models; ; p++) { - if (!*p) { - printk(KERN_INFO "i8k: unsupported model: %s\n", product_name); - return -ENODEV; + if (strncmp(system_vendor, DELL_SIGNATURE, strlen(DELL_SIGNATURE)) != 0) { + printk(KERN_INFO "i8k: not running on a Dell system\n"); + return -ENODEV; } - if (strncmp(product_name,*p,strlen(*p)) == 0) { - break; + + for (p = supported_models;; p++) { + if (!*p) { + printk(KERN_INFO "i8k: unsupported model: %s\n", + product_name); + return -ENODEV; + } + if (strncmp(product_name, *p, strlen(*p)) == 0) { + break; + } } - } - return 0; + return 0; } /* @@ -688,59 +687,60 @@ static int __init i8k_dmi_probe(void) */ static int __init i8k_probe(void) { - char buff[4]; - int version; - int smm_found = 0; - - /* - * Get DMI information - */ - if (i8k_dmi_probe() != 0) { - printk(KERN_INFO "i8k: vendor=%s, model=%s, version=%s\n", - system_vendor, product_name, bios_version); - } - - /* - * Get SMM Dell signature - */ - if (i8k_get_dell_signature() != 0) { - printk(KERN_INFO "i8k: unable to get SMM Dell signature\n"); - } else { - smm_found = 1; - } - - /* - * Get SMM BIOS version. - */ - version = i8k_get_bios_version(); - if (version <= 0) { - printk(KERN_INFO "i8k: unable to get SMM BIOS version\n"); - } else { - smm_found = 1; - buff[0] = (version >> 16) & 0xff; - buff[1] = (version >> 8) & 0xff; - buff[2] = (version) & 0xff; - buff[3] = '\0'; + char buff[4]; + int version; + int smm_found = 0; + /* - * If DMI BIOS version is unknown use SMM BIOS version. + * Get DMI information */ - if (bios_version[0] == '?') { - strcpy(bios_version, buff); + if (i8k_dmi_probe() != 0) { + printk(KERN_INFO "i8k: vendor=%s, model=%s, version=%s\n", + system_vendor, product_name, bios_version); } + /* - * Check if the two versions match. + * Get SMM Dell signature */ - if (strncmp(buff,bios_version,sizeof(bios_version)) != 0) { - printk(KERN_INFO "i8k: BIOS version mismatch: %s != %s\n", - buff, bios_version); + if (i8k_get_dell_signature() != 0) { + printk(KERN_INFO "i8k: unable to get SMM Dell signature\n"); + } else { + smm_found = 1; } - } - if (!smm_found && !force) { - return -ENODEV; - } + /* + * Get SMM BIOS version. + */ + version = i8k_get_bios_version(); + if (version <= 0) { + printk(KERN_INFO "i8k: unable to get SMM BIOS version\n"); + } else { + smm_found = 1; + buff[0] = (version >> 16) & 0xff; + buff[1] = (version >> 8) & 0xff; + buff[2] = (version) & 0xff; + buff[3] = '\0'; + /* + * If DMI BIOS version is unknown use SMM BIOS version. + */ + if (bios_version[0] == '?') { + strcpy(bios_version, buff); + } + /* + * Check if the two versions match. + */ + if (strncmp(buff, bios_version, sizeof(bios_version)) != 0) { + printk(KERN_INFO + "i8k: BIOS version mismatch: %s != %s\n", buff, + bios_version); + } + } + + if (!smm_found && !force) { + return -ENODEV; + } - return 0; + return 0; } #ifdef MODULE @@ -748,40 +748,40 @@ static #endif int __init i8k_init(void) { - struct proc_dir_entry *proc_i8k; - - /* Are we running on an supported laptop? */ - if (i8k_probe() != 0) { - return -ENODEV; - } - - /* Register the proc entry */ - proc_i8k = create_proc_info_entry("i8k", 0, NULL, i8k_get_info); - if (!proc_i8k) { - return -ENOENT; - } - proc_i8k->proc_fops = &i8k_fops; - proc_i8k->owner = THIS_MODULE; - - printk(KERN_INFO - "Dell laptop SMM driver v%s Massimo Dal Zotto (dz@debian.org)\n", - I8K_VERSION); - - return 0; + struct proc_dir_entry *proc_i8k; + + /* Are we running on an supported laptop? */ + if (i8k_probe() != 0) { + return -ENODEV; + } + + /* Register the proc entry */ + proc_i8k = create_proc_info_entry("i8k", 0, NULL, i8k_get_info); + if (!proc_i8k) { + return -ENOENT; + } + proc_i8k->proc_fops = &i8k_fops; + proc_i8k->owner = THIS_MODULE; + + printk(KERN_INFO + "Dell laptop SMM driver v%s Massimo Dal Zotto (dz@debian.org)\n", + I8K_VERSION); + + return 0; } #ifdef MODULE int init_module(void) { - return i8k_init(); + return i8k_init(); } void cleanup_module(void) { - /* Remove the proc entry */ - remove_proc_entry("i8k", NULL); + /* Remove the proc entry */ + remove_proc_entry("i8k", NULL); - printk(KERN_INFO "i8k: module unloaded\n"); + printk(KERN_INFO "i8k: module unloaded\n"); } #endif -- cgit v1.2.3 From e70c9d5e61c6cb2272c866fc1303e62975006752 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sat, 25 Jun 2005 14:54:25 -0700 Subject: [PATCH] I8K: use standard DMI interface I8K: Change to use stock dmi infrastructure instead of homegrown parsing code. The driver now requires box's DMI data to match list of supported models so driver can be safely compiled-in by default without fear of it poking into random SMM BIOS code. DMI checks can be ignored with i8k.ignore_dmi option. Signed-off-by: Dmitry Torokhov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 3 + arch/i386/kernel/dmi_scan.c | 6 +- drivers/char/i8k.c | 302 +++++++----------------------------- include/linux/dmi.h | 1 + 4 files changed, 60 insertions(+), 252 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4924d387a65..86db43fd6b0 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -548,6 +548,9 @@ running once the system is up. i810= [HW,DRM] + i8k.ignore_dmi [HW] Continue probing hardware even if DMI data + indicates that the driver is running on unsupported + hardware. i8k.force [HW] Activate i8k driver even if SMM BIOS signature does not match list of supported models. i8k.power_status diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c index 6ed7e28f306..3facd20212b 100644 --- a/arch/i386/kernel/dmi_scan.c +++ b/arch/i386/kernel/dmi_scan.c @@ -414,6 +414,7 @@ static void __init dmi_decode(struct dmi_header *dm) dmi_save_ident(dm, DMI_PRODUCT_VERSION, 6); dmi_printk(("Serial Number: %s\n", dmi_string(dm, data[7]))); + dmi_save_ident(dm, DMI_PRODUCT_SERIAL, 7); break; case 2: dmi_printk(("Board Vendor: %s\n", @@ -470,7 +471,6 @@ fail: d++; return count; } - EXPORT_SYMBOL(dmi_check_system); /** @@ -480,8 +480,8 @@ EXPORT_SYMBOL(dmi_check_system); * Returns one DMI data value, can be used to perform * complex DMI data checks. */ -char * dmi_get_system_info(int field) +char *dmi_get_system_info(int field) { return dmi_ident[field]; } - +EXPORT_SYMBOL(dmi_get_system_info); diff --git a/drivers/char/i8k.c b/drivers/char/i8k.c index bf5e43beca6..81d2f675fb7 100644 --- a/drivers/char/i8k.c +++ b/drivers/char/i8k.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include @@ -52,18 +52,7 @@ #define I8K_TEMPERATURE_BUG 1 -#define DELL_SIGNATURE "Dell Computer" - -static char *supported_models[] = { - "Inspiron", - "Latitude", - NULL -}; - -static char system_vendor[48] = "?"; -static char product_name[48] = "?"; -static char bios_version[4] = "?"; -static char serial_number[16] = "?"; +static char bios_version[4]; MODULE_AUTHOR("Massimo Dal Zotto (dz@debian.org)"); MODULE_DESCRIPTION("Driver for accessing SMM BIOS on Dell laptops"); @@ -73,6 +62,10 @@ static int force; module_param(force, bool, 0); MODULE_PARM_DESC(force, "Force loading without checking for supported models"); +static int ignore_dmi; +module_param(ignore_dmi, bool, 0); +MODULE_PARM_DESC(ignore_dmi, "Continue probing hardware even if DMI data does not match"); + static int restricted; module_param(restricted, bool, 0); MODULE_PARM_DESC(restricted, "Allow fan control if SYS_ADMIN capability set"); @@ -99,11 +92,10 @@ typedef struct { unsigned int edi __attribute__ ((packed)); } SMMRegisters; -typedef struct { - u8 type; - u8 length; - u16 handle; -} DMIHeader; +static inline char *i8k_get_dmi_data(int field) +{ + return dmi_get_system_info(field) ? : "N/A"; +} /* * Call the System Management Mode BIOS. Code provided by Jonathan Buzzard. @@ -162,15 +154,6 @@ static int i8k_get_bios_version(void) return regs.eax; } -/* - * Read the machine id. - */ -static int i8k_get_serial_number(unsigned char *buff) -{ - strlcpy(buff, serial_number, sizeof(serial_number)); - return 0; -} - /* * Read the Fn key status. */ @@ -328,7 +311,7 @@ static int i8k_get_dell_signature(void) static int i8k_ioctl(struct inode *ip, struct file *fp, unsigned int cmd, unsigned long arg) { - int val; + int val = 0; int speed; unsigned char buff[16]; int __user *argp = (int __user *)arg; @@ -343,7 +326,7 @@ static int i8k_ioctl(struct inode *ip, struct file *fp, unsigned int cmd, case I8K_MACHINE_ID: memset(buff, 0, 16); - val = i8k_get_serial_number(buff); + strlcpy(buff, i8k_get_dmi_data(DMI_PRODUCT_SERIAL), sizeof(buff)); break; case I8K_FN_STATUS: @@ -451,10 +434,10 @@ static int i8k_get_info(char *buffer, char **start, off_t fpos, int length) n = sprintf(buffer, "%s %s %s %d %d %d %d %d %d %d\n", I8K_PROC_FMT, bios_version, - serial_number, + dmi_get_system_info(DMI_PRODUCT_SERIAL) ? : "N/A", cpu_temp, - left_fan, - right_fan, left_speed, right_speed, ac_power, fn_key); + left_fan, right_fan, left_speed, right_speed, + ac_power, fn_key); return n; } @@ -486,201 +469,23 @@ static ssize_t i8k_read(struct file *f, char __user * buffer, size_t len, return len; } -static char *__init string_trim(char *s, int size) -{ - int len; - char *p; - - if ((len = strlen(s)) > size) { - len = size; - } - - for (p = s + len - 1; len && (*p == ' '); len--, p--) { - *p = '\0'; - } - - return s; -} - -/* DMI code, stolen from arch/i386/kernel/dmi_scan.c */ - -/* - * |<-- dmi->length -->| - * | | - * |dmi header s=N | string1,\0, ..., stringN,\0, ..., \0 - * | | - * +-----------------------+ - */ -static char *__init dmi_string(DMIHeader * dmi, u8 s) -{ - u8 *p; - - if (!s) { - return ""; - } - s--; - - p = (u8 *) dmi + dmi->length; - while (s > 0) { - p += strlen(p); - p++; - s--; - } - - return p; -} - -static void __init dmi_decode(DMIHeader * dmi) -{ - u8 *data = (u8 *) dmi; - char *p; - -#ifdef I8K_DEBUG - int i; - printk("%08x ", (int)data); - for (i = 0; i < data[1] && i < 64; i++) { - printk("%02x ", data[i]); - } - printk("\n"); -#endif - - switch (dmi->type) { - case 0: /* BIOS Information */ - p = dmi_string(dmi, data[5]); - if (*p) { - strlcpy(bios_version, p, sizeof(bios_version)); - string_trim(bios_version, sizeof(bios_version)); - } - break; - case 1: /* System Information */ - p = dmi_string(dmi, data[4]); - if (*p) { - strlcpy(system_vendor, p, sizeof(system_vendor)); - string_trim(system_vendor, sizeof(system_vendor)); - } - p = dmi_string(dmi, data[5]); - if (*p) { - strlcpy(product_name, p, sizeof(product_name)); - string_trim(product_name, sizeof(product_name)); - } - p = dmi_string(dmi, data[7]); - if (*p) { - strlcpy(serial_number, p, sizeof(serial_number)); - string_trim(serial_number, sizeof(serial_number)); - } - break; - } -} - -static int __init dmi_table(u32 base, int len, int num, - void (*fn) (DMIHeader *)) -{ - u8 *buf; - u8 *data; - DMIHeader *dmi; - int i = 1; - - buf = ioremap(base, len); - if (buf == NULL) { - return -1; - } - data = buf; - - /* - * Stop when we see al the items the table claimed to have - * or we run off the end of the table (also happens) - */ - while ((i < num) && ((data - buf) < len)) { - dmi = (DMIHeader *) data; - /* - * Avoid misparsing crud if the length of the last - * record is crap - */ - if ((data - buf + dmi->length) >= len) { - break; - } - fn(dmi); - data += dmi->length; - /* - * Don't go off the end of the data if there is - * stuff looking like string fill past the end - */ - while (((data - buf) < len) && (*data || data[1])) { - data++; - } - data += 2; - i++; - } - iounmap(buf); - - return 0; -} - -static int __init dmi_iterate(void (*decode) (DMIHeader *)) -{ - unsigned char buf[20]; - void __iomem *p = ioremap(0xe0000, 0x20000), *q; - - if (!p) - return -1; - - for (q = p; q < p + 0x20000; q += 16) { - memcpy_fromio(buf, q, 20); - if (memcmp(buf, "_DMI_", 5) == 0) { - u16 num = buf[13] << 8 | buf[12]; - u16 len = buf[7] << 8 | buf[6]; - u32 base = buf[11] << 24 | buf[10] << 16 | buf[9] << 8 | buf[8]; -#ifdef I8K_DEBUG - printk(KERN_INFO "DMI %d.%d present.\n", - buf[14] >> 4, buf[14] & 0x0F); - printk(KERN_INFO "%d structures occupying %d bytes.\n", - buf[13] << 8 | buf[12], buf[7] << 8 | buf[6]); - printk(KERN_INFO "DMI table at 0x%08X.\n", - buf[11] << 24 | buf[10] << 16 | buf[9] << 8 | - buf[8]); -#endif - if (dmi_table(base, len, num, decode) == 0) { - iounmap(p); - return 0; - } - } - } - iounmap(p); - return -1; -} - -/* end of DMI code */ - -/* - * Get DMI information. - */ -static int __init i8k_dmi_probe(void) -{ - char **p; - - if (dmi_iterate(dmi_decode) != 0) { - printk(KERN_INFO "i8k: unable to get DMI information\n"); - return -ENODEV; - } - - if (strncmp(system_vendor, DELL_SIGNATURE, strlen(DELL_SIGNATURE)) != 0) { - printk(KERN_INFO "i8k: not running on a Dell system\n"); - return -ENODEV; - } - - for (p = supported_models;; p++) { - if (!*p) { - printk(KERN_INFO "i8k: unsupported model: %s\n", - product_name); - return -ENODEV; - } - if (strncmp(product_name, *p, strlen(*p)) == 0) { - break; - } - } - - return 0; -} +static struct dmi_system_id __initdata i8k_dmi_table[] = { + { + .ident = "Dell Inspiron", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer"), + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron"), + }, + }, + { + .ident = "Dell Latitude", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer"), + DMI_MATCH(DMI_PRODUCT_NAME, "Latitude"), + }, + }, + { } +}; /* * Probe for the presence of a supported laptop. @@ -689,23 +494,30 @@ static int __init i8k_probe(void) { char buff[4]; int version; - int smm_found = 0; /* * Get DMI information */ - if (i8k_dmi_probe() != 0) { + if (!dmi_check_system(i8k_dmi_table)) { + if (!ignore_dmi && !force) + return -ENODEV; + + printk(KERN_INFO "i8k: not running on a supported Dell system.\n"); printk(KERN_INFO "i8k: vendor=%s, model=%s, version=%s\n", - system_vendor, product_name, bios_version); + i8k_get_dmi_data(DMI_SYS_VENDOR), + i8k_get_dmi_data(DMI_PRODUCT_NAME), + i8k_get_dmi_data(DMI_BIOS_VERSION)); } + strlcpy(bios_version, i8k_get_dmi_data(DMI_BIOS_VERSION), sizeof(bios_version)); + /* * Get SMM Dell signature */ if (i8k_get_dell_signature() != 0) { - printk(KERN_INFO "i8k: unable to get SMM Dell signature\n"); - } else { - smm_found = 1; + printk(KERN_ERR "i8k: unable to get SMM Dell signature\n"); + if (!force) + return -ENODEV; } /* @@ -713,9 +525,8 @@ static int __init i8k_probe(void) */ version = i8k_get_bios_version(); if (version <= 0) { - printk(KERN_INFO "i8k: unable to get SMM BIOS version\n"); + printk(KERN_WARNING "i8k: unable to get SMM BIOS version\n"); } else { - smm_found = 1; buff[0] = (version >> 16) & 0xff; buff[1] = (version >> 8) & 0xff; buff[2] = (version) & 0xff; @@ -723,21 +534,15 @@ static int __init i8k_probe(void) /* * If DMI BIOS version is unknown use SMM BIOS version. */ - if (bios_version[0] == '?') { - strcpy(bios_version, buff); - } + if (!dmi_get_system_info(DMI_BIOS_VERSION)) + strlcpy(bios_version, buff, sizeof(bios_version)); + /* * Check if the two versions match. */ - if (strncmp(buff, bios_version, sizeof(bios_version)) != 0) { - printk(KERN_INFO - "i8k: BIOS version mismatch: %s != %s\n", buff, - bios_version); - } - } - - if (!smm_found && !force) { - return -ENODEV; + if (strncmp(buff, bios_version, sizeof(bios_version)) != 0) + printk(KERN_WARNING "i8k: BIOS version mismatch: %s != %s\n", + buff, bios_version); } return 0; @@ -751,9 +556,8 @@ int __init i8k_init(void) struct proc_dir_entry *proc_i8k; /* Are we running on an supported laptop? */ - if (i8k_probe() != 0) { + if (i8k_probe()) return -ENODEV; - } /* Register the proc entry */ proc_i8k = create_proc_info_entry("i8k", 0, NULL, i8k_get_info); diff --git a/include/linux/dmi.h b/include/linux/dmi.h index d2bcf556088..5e93e6dce9a 100644 --- a/include/linux/dmi.h +++ b/include/linux/dmi.h @@ -9,6 +9,7 @@ enum dmi_field { DMI_SYS_VENDOR, DMI_PRODUCT_NAME, DMI_PRODUCT_VERSION, + DMI_PRODUCT_SERIAL, DMI_BOARD_VENDOR, DMI_BOARD_NAME, DMI_BOARD_VERSION, -- cgit v1.2.3 From 352f8f8bfbfb401c8af4c685beaafeb95c27fdd1 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sat, 25 Jun 2005 14:54:26 -0700 Subject: [PATCH] I8K: convert to seqfile I8K: Change proc code to use seq_file. Signed-off-by: Dmitry Torokhov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/i8k.c | 64 +++++++++++++++++++----------------------------------- 1 file changed, 22 insertions(+), 42 deletions(-) diff --git a/drivers/char/i8k.c b/drivers/char/i8k.c index 81d2f675fb7..1599456bdb6 100644 --- a/drivers/char/i8k.c +++ b/drivers/char/i8k.c @@ -20,13 +20,14 @@ #include #include #include +#include #include #include #include #include -#define I8K_VERSION "1.13 14/05/2002" +#define I8K_VERSION "1.14 21/02/2005" #define I8K_SMM_FN_STATUS 0x0025 #define I8K_SMM_POWER_STATUS 0x0069 @@ -74,13 +75,16 @@ static int power_status; module_param(power_status, bool, 0600); MODULE_PARM_DESC(power_status, "Report power status in /proc/i8k"); -static ssize_t i8k_read(struct file *, char __user *, size_t, loff_t *); +static int i8k_open_fs(struct inode *inode, struct file *file); static int i8k_ioctl(struct inode *, struct file *, unsigned int, unsigned long); static struct file_operations i8k_fops = { - .read = i8k_read, - .ioctl = i8k_ioctl, + .open = i8k_open_fs, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .ioctl = i8k_ioctl, }; typedef struct { @@ -400,9 +404,9 @@ static int i8k_ioctl(struct inode *ip, struct file *fp, unsigned int cmd, /* * Print the information for /proc/i8k. */ -static int i8k_get_info(char *buffer, char **start, off_t fpos, int length) +static int i8k_proc_show(struct seq_file *seq, void *offset) { - int n, fn_key, cpu_temp, ac_power; + int fn_key, cpu_temp, ac_power; int left_fan, right_fan, left_speed, right_speed; cpu_temp = i8k_get_cpu_temp(); /* 11100 µs */ @@ -431,42 +435,18 @@ static int i8k_get_info(char *buffer, char **start, off_t fpos, int length) * 9) AC power * 10) Fn Key status */ - n = sprintf(buffer, "%s %s %s %d %d %d %d %d %d %d\n", - I8K_PROC_FMT, - bios_version, - dmi_get_system_info(DMI_PRODUCT_SERIAL) ? : "N/A", - cpu_temp, - left_fan, right_fan, left_speed, right_speed, - ac_power, fn_key); - - return n; + return seq_printf(seq, "%s %s %s %d %d %d %d %d %d %d\n", + I8K_PROC_FMT, + bios_version, + dmi_get_system_info(DMI_PRODUCT_SERIAL) ? : "N/A", + cpu_temp, + left_fan, right_fan, left_speed, right_speed, + ac_power, fn_key); } -static ssize_t i8k_read(struct file *f, char __user * buffer, size_t len, - loff_t * fpos) +static int i8k_open_fs(struct inode *inode, struct file *file) { - int n; - char info[128]; - - n = i8k_get_info(info, NULL, 0, 128); - if (n <= 0) { - return n; - } - - if (*fpos >= n) { - return 0; - } - - if ((*fpos + len) >= n) { - len = n - *fpos; - } - - if (copy_to_user(buffer, info, len) != 0) { - return -EFAULT; - } - - *fpos += len; - return len; + return single_open(file, i8k_proc_show, NULL); } static struct dmi_system_id __initdata i8k_dmi_table[] = { @@ -560,10 +540,10 @@ int __init i8k_init(void) return -ENODEV; /* Register the proc entry */ - proc_i8k = create_proc_info_entry("i8k", 0, NULL, i8k_get_info); - if (!proc_i8k) { + proc_i8k = create_proc_entry("i8k", 0, NULL); + if (!proc_i8k) return -ENOENT; - } + proc_i8k->proc_fops = &i8k_fops; proc_i8k->owner = THIS_MODULE; -- cgit v1.2.3 From 8378b92405dd606c6f3a0b1e303b67c8f8c9f743 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sat, 25 Jun 2005 14:54:27 -0700 Subject: [PATCH] I8K: initialization code cleanup; formatting I8K: use module_{init|exit} instead of old style #ifdef MODULE code, some formatting changes. Signed-off-by: Dmitry Torokhov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/i8k.c | 149 +++++++++++++++++----------------------------------- drivers/char/misc.c | 4 -- 2 files changed, 47 insertions(+), 106 deletions(-) diff --git a/drivers/char/i8k.c b/drivers/char/i8k.c index 1599456bdb6..584e9a4a6f1 100644 --- a/drivers/char/i8k.c +++ b/drivers/char/i8k.c @@ -87,14 +87,14 @@ static struct file_operations i8k_fops = { .ioctl = i8k_ioctl, }; -typedef struct { +struct smm_regs { unsigned int eax; unsigned int ebx __attribute__ ((packed)); unsigned int ecx __attribute__ ((packed)); unsigned int edx __attribute__ ((packed)); unsigned int esi __attribute__ ((packed)); unsigned int edi __attribute__ ((packed)); -} SMMRegisters; +}; static inline char *i8k_get_dmi_data(int field) { @@ -104,7 +104,7 @@ static inline char *i8k_get_dmi_data(int field) /* * Call the System Management Mode BIOS. Code provided by Jonathan Buzzard. */ -static int i8k_smm(SMMRegisters * regs) +static int i8k_smm(struct smm_regs *regs) { int rc; int eax = regs->eax; @@ -134,9 +134,8 @@ static int i8k_smm(SMMRegisters * regs) : "a"(regs) : "%ebx", "%ecx", "%edx", "%esi", "%edi", "memory"); - if ((rc != 0) || ((regs->eax & 0xffff) == 0xffff) || (regs->eax == eax)) { + if (rc != 0 || (regs->eax & 0xffff) == 0xffff || regs->eax == eax) return -EINVAL; - } return 0; } @@ -147,15 +146,9 @@ static int i8k_smm(SMMRegisters * regs) */ static int i8k_get_bios_version(void) { - SMMRegisters regs = { 0, 0, 0, 0, 0, 0 }; - int rc; - - regs.eax = I8K_SMM_BIOS_VERSION; - if ((rc = i8k_smm(®s)) < 0) { - return rc; - } + struct smm_regs regs = { .eax = I8K_SMM_BIOS_VERSION, }; - return regs.eax; + return i8k_smm(®s) ? : regs.eax; } /* @@ -163,13 +156,11 @@ static int i8k_get_bios_version(void) */ static int i8k_get_fn_status(void) { - SMMRegisters regs = { 0, 0, 0, 0, 0, 0 }; + struct smm_regs regs = { .eax = I8K_SMM_FN_STATUS, }; int rc; - regs.eax = I8K_SMM_FN_STATUS; - if ((rc = i8k_smm(®s)) < 0) { + if ((rc = i8k_smm(®s)) < 0) return rc; - } switch ((regs.eax >> I8K_FN_SHIFT) & I8K_FN_MASK) { case I8K_FN_UP: @@ -188,20 +179,13 @@ static int i8k_get_fn_status(void) */ static int i8k_get_power_status(void) { - SMMRegisters regs = { 0, 0, 0, 0, 0, 0 }; + struct smm_regs regs = { .eax = I8K_SMM_POWER_STATUS, }; int rc; - regs.eax = I8K_SMM_POWER_STATUS; - if ((rc = i8k_smm(®s)) < 0) { + if ((rc = i8k_smm(®s)) < 0) return rc; - } - switch (regs.eax & 0xff) { - case I8K_POWER_AC: - return I8K_AC; - default: - return I8K_BATTERY; - } + return (regs.eax & 0xff) == I8K_POWER_AC ? I8K_AC : I8K_BATTERY; } /* @@ -209,16 +193,10 @@ static int i8k_get_power_status(void) */ static int i8k_get_fan_status(int fan) { - SMMRegisters regs = { 0, 0, 0, 0, 0, 0 }; - int rc; + struct smm_regs regs = { .eax = I8K_SMM_GET_FAN, }; - regs.eax = I8K_SMM_GET_FAN; regs.ebx = fan & 0xff; - if ((rc = i8k_smm(®s)) < 0) { - return rc; - } - - return (regs.eax & 0xff); + return i8k_smm(®s) ? : regs.eax & 0xff; } /* @@ -226,16 +204,10 @@ static int i8k_get_fan_status(int fan) */ static int i8k_get_fan_speed(int fan) { - SMMRegisters regs = { 0, 0, 0, 0, 0, 0 }; - int rc; + struct smm_regs regs = { .eax = I8K_SMM_GET_SPEED, }; - regs.eax = I8K_SMM_GET_SPEED; regs.ebx = fan & 0xff; - if ((rc = i8k_smm(®s)) < 0) { - return rc; - } - - return (regs.eax & 0xffff) * I8K_FAN_MULT; + return i8k_smm(®s) ? : (regs.eax & 0xffff) * I8K_FAN_MULT; } /* @@ -243,18 +215,12 @@ static int i8k_get_fan_speed(int fan) */ static int i8k_set_fan(int fan, int speed) { - SMMRegisters regs = { 0, 0, 0, 0, 0, 0 }; - int rc; + struct smm_regs regs = { .eax = I8K_SMM_SET_FAN, }; speed = (speed < 0) ? 0 : ((speed > I8K_FAN_MAX) ? I8K_FAN_MAX : speed); - - regs.eax = I8K_SMM_SET_FAN; regs.ebx = (fan & 0xff) | (speed << 8); - if ((rc = i8k_smm(®s)) < 0) { - return rc; - } - return (i8k_get_fan_status(fan)); + return i8k_smm(®s) ? : i8k_get_fan_status(fan); } /* @@ -262,18 +228,17 @@ static int i8k_set_fan(int fan, int speed) */ static int i8k_get_cpu_temp(void) { - SMMRegisters regs = { 0, 0, 0, 0, 0, 0 }; + struct smm_regs regs = { .eax = I8K_SMM_GET_TEMP, }; int rc; int temp; #ifdef I8K_TEMPERATURE_BUG - static int prev = 0; + static int prev; #endif - regs.eax = I8K_SMM_GET_TEMP; - if ((rc = i8k_smm(®s)) < 0) { + if ((rc = i8k_smm(®s)) < 0) return rc; - } + temp = regs.eax & 0xff; #ifdef I8K_TEMPERATURE_BUG @@ -297,19 +262,13 @@ static int i8k_get_cpu_temp(void) static int i8k_get_dell_signature(void) { - SMMRegisters regs = { 0, 0, 0, 0, 0, 0 }; + struct smm_regs regs = { .eax = I8K_SMM_GET_DELL_SIG, }; int rc; - regs.eax = I8K_SMM_GET_DELL_SIG; - if ((rc = i8k_smm(®s)) < 0) { + if ((rc = i8k_smm(®s)) < 0) return rc; - } - if ((regs.eax == 1145651527) && (regs.edx == 1145392204)) { - return 0; - } else { - return -1; - } + return regs.eax == 1145651527 && regs.edx == 1145392204 ? 0 : -1; } static int i8k_ioctl(struct inode *ip, struct file *fp, unsigned int cmd, @@ -346,29 +305,29 @@ static int i8k_ioctl(struct inode *ip, struct file *fp, unsigned int cmd, break; case I8K_GET_SPEED: - if (copy_from_user(&val, argp, sizeof(int))) { + if (copy_from_user(&val, argp, sizeof(int))) return -EFAULT; - } + val = i8k_get_fan_speed(val); break; case I8K_GET_FAN: - if (copy_from_user(&val, argp, sizeof(int))) { + if (copy_from_user(&val, argp, sizeof(int))) return -EFAULT; - } + val = i8k_get_fan_status(val); break; case I8K_SET_FAN: - if (restricted && !capable(CAP_SYS_ADMIN)) { + if (restricted && !capable(CAP_SYS_ADMIN)) return -EPERM; - } - if (copy_from_user(&val, argp, sizeof(int))) { + + if (copy_from_user(&val, argp, sizeof(int))) return -EFAULT; - } - if (copy_from_user(&speed, argp + 1, sizeof(int))) { + + if (copy_from_user(&speed, argp + 1, sizeof(int))) return -EFAULT; - } + val = i8k_set_fan(val, speed); break; @@ -376,25 +335,24 @@ static int i8k_ioctl(struct inode *ip, struct file *fp, unsigned int cmd, return -EINVAL; } - if (val < 0) { + if (val < 0) return val; - } switch (cmd) { case I8K_BIOS_VERSION: - if (copy_to_user(argp, &val, 4)) { + if (copy_to_user(argp, &val, 4)) return -EFAULT; - } + break; case I8K_MACHINE_ID: - if (copy_to_user(argp, buff, 16)) { + if (copy_to_user(argp, buff, 16)) return -EFAULT; - } + break; default: - if (copy_to_user(argp, &val, sizeof(int))) { + if (copy_to_user(argp, &val, sizeof(int))) return -EFAULT; - } + break; } @@ -415,11 +373,10 @@ static int i8k_proc_show(struct seq_file *seq, void *offset) left_speed = i8k_get_fan_speed(I8K_FAN_LEFT); /* 580 µs */ right_speed = i8k_get_fan_speed(I8K_FAN_RIGHT); /* 580 µs */ fn_key = i8k_get_fn_status(); /* 750 µs */ - if (power_status) { + if (power_status) ac_power = i8k_get_power_status(); /* 14700 µs */ - } else { + else ac_power = -1; - } /* * Info: @@ -528,10 +485,7 @@ static int __init i8k_probe(void) return 0; } -#ifdef MODULE -static -#endif -int __init i8k_init(void) +static int __init i8k_init(void) { struct proc_dir_entry *proc_i8k; @@ -554,19 +508,10 @@ int __init i8k_init(void) return 0; } -#ifdef MODULE -int init_module(void) +static void __exit i8k_exit(void) { - return i8k_init(); -} - -void cleanup_module(void) -{ - /* Remove the proc entry */ remove_proc_entry("i8k", NULL); - - printk(KERN_INFO "i8k: module unloaded\n"); } -#endif -/* end of file */ +module_init(i8k_init); +module_exit(i8k_exit); diff --git a/drivers/char/misc.c b/drivers/char/misc.c index f7e838eae19..31cf84d6902 100644 --- a/drivers/char/misc.c +++ b/drivers/char/misc.c @@ -66,7 +66,6 @@ static unsigned char misc_minors[DYNAMIC_MINORS / 8]; extern int rtc_DP8570A_init(void); extern int rtc_MK48T08_init(void); extern int pmu_device_init(void); -extern int i8k_init(void); #ifdef CONFIG_PROC_FS static void *misc_seq_start(struct seq_file *seq, loff_t *pos) @@ -312,9 +311,6 @@ static int __init misc_init(void) #endif #ifdef CONFIG_PMAC_PBOOK pmu_device_init(); -#endif -#ifdef CONFIG_I8K - i8k_init(); #endif if (register_chrdev(MISC_MAJOR,"misc",&misc_fops)) { printk("unable to get major %d for misc devices\n", -- cgit v1.2.3 From 7e0fa31dbf5968ce1e94f73c04a9402170432ecf Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sat, 25 Jun 2005 14:54:28 -0700 Subject: [PATCH] I8K: add new BIOS signatures I8K: add BIOS signatures of a newer Dell laptops, also there can be more than one temperature sensor reported by BIOS. Lifted from driver 1.25 on Massimo Dal Zotto's site. Signed-off-by: Dmitry Torokhov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/i8k.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/drivers/char/i8k.c b/drivers/char/i8k.c index 584e9a4a6f1..6c4b3f986d0 100644 --- a/drivers/char/i8k.c +++ b/drivers/char/i8k.c @@ -35,7 +35,8 @@ #define I8K_SMM_GET_FAN 0x00a3 #define I8K_SMM_GET_SPEED 0x02a3 #define I8K_SMM_GET_TEMP 0x10a3 -#define I8K_SMM_GET_DELL_SIG 0xffa3 +#define I8K_SMM_GET_DELL_SIG1 0xfea3 +#define I8K_SMM_GET_DELL_SIG2 0xffa3 #define I8K_SMM_BIOS_VERSION 0x00a6 #define I8K_FAN_MULT 30 @@ -226,7 +227,7 @@ static int i8k_set_fan(int fan, int speed) /* * Read the cpu temperature. */ -static int i8k_get_cpu_temp(void) +static int i8k_get_temp(int sensor) { struct smm_regs regs = { .eax = I8K_SMM_GET_TEMP, }; int rc; @@ -235,7 +236,7 @@ static int i8k_get_cpu_temp(void) #ifdef I8K_TEMPERATURE_BUG static int prev; #endif - + regs.ebx = sensor & 0xff; if ((rc = i8k_smm(®s)) < 0) return rc; @@ -260,9 +261,9 @@ static int i8k_get_cpu_temp(void) return temp; } -static int i8k_get_dell_signature(void) +static int i8k_get_dell_signature(int req_fn) { - struct smm_regs regs = { .eax = I8K_SMM_GET_DELL_SIG, }; + struct smm_regs regs = { .eax = req_fn, }; int rc; if ((rc = i8k_smm(®s)) < 0) @@ -301,7 +302,7 @@ static int i8k_ioctl(struct inode *ip, struct file *fp, unsigned int cmd, break; case I8K_GET_TEMP: - val = i8k_get_cpu_temp(); + val = i8k_get_temp(0); break; case I8K_GET_SPEED: @@ -367,7 +368,7 @@ static int i8k_proc_show(struct seq_file *seq, void *offset) int fn_key, cpu_temp, ac_power; int left_fan, right_fan, left_speed, right_speed; - cpu_temp = i8k_get_cpu_temp(); /* 11100 µs */ + cpu_temp = i8k_get_temp(0); /* 11100 µs */ left_fan = i8k_get_fan_status(I8K_FAN_LEFT); /* 580 µs */ right_fan = i8k_get_fan_status(I8K_FAN_RIGHT); /* 580 µs */ left_speed = i8k_get_fan_speed(I8K_FAN_LEFT); /* 580 µs */ @@ -421,6 +422,20 @@ static struct dmi_system_id __initdata i8k_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Latitude"), }, }, + { + .ident = "Dell Inspiron 2", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron"), + }, + }, + { + .ident = "Dell Latitude 2", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Latitude"), + }, + }, { } }; @@ -451,7 +466,8 @@ static int __init i8k_probe(void) /* * Get SMM Dell signature */ - if (i8k_get_dell_signature() != 0) { + if (i8k_get_dell_signature(I8K_SMM_GET_DELL_SIG1) && + i8k_get_dell_signature(I8K_SMM_GET_DELL_SIG2)) { printk(KERN_ERR "i8k: unable to get SMM Dell signature\n"); if (!force) return -ENODEV; -- cgit v1.2.3 From 821fe94727e72d63e1abd1bc0b044c72dfad9fb6 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sat, 25 Jun 2005 14:54:29 -0700 Subject: [PATCH] gcc4 compile fix for recent ia64 xpc changes Gcc4 doesn't like volatile casts as lvalues. Make the structure members volatile instead. Signed-off-by: Dave Jones Cc: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/sn/kernel/xpc.h | 12 ++++++------ arch/ia64/sn/kernel/xpc_channel.c | 12 ++++++------ arch/ia64/sn/kernel/xpc_partition.c | 2 +- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/ia64/sn/kernel/xpc.h b/arch/ia64/sn/kernel/xpc.h index 1a0aed8490d..d0ee635daf2 100644 --- a/arch/ia64/sn/kernel/xpc.h +++ b/arch/ia64/sn/kernel/xpc.h @@ -87,7 +87,7 @@ struct xpc_rsvd_page { u8 partid; /* partition ID from SAL */ u8 version; u8 pad[6]; /* pad to u64 align */ - u64 vars_pa; + volatile u64 vars_pa; u64 part_nasids[XP_NASID_MASK_WORDS] ____cacheline_aligned; u64 mach_nasids[XP_NASID_MASK_WORDS] ____cacheline_aligned; }; @@ -138,7 +138,7 @@ struct xpc_vars { * occupies half a cacheline. */ struct xpc_vars_part { - u64 magic; + volatile u64 magic; u64 openclose_args_pa; /* physical address of open and close args */ u64 GPs_pa; /* physical address of Get/Put values */ @@ -185,8 +185,8 @@ struct xpc_vars_part { * Define a Get/Put value pair (pointers) used with a message queue. */ struct xpc_gp { - s64 get; /* Get value */ - s64 put; /* Put value */ + volatile s64 get; /* Get value */ + volatile s64 put; /* Put value */ }; #define XPC_GP_SIZE \ @@ -231,7 +231,7 @@ struct xpc_openclose_args { */ struct xpc_notify { struct semaphore sema; /* notify semaphore */ - u8 type; /* type of notification */ + volatile u8 type; /* type of notification */ /* the following two fields are only used if type == XPC_N_CALL */ xpc_notify_func func; /* user's notify function */ @@ -439,7 +439,7 @@ struct xpc_partition { /* XPC infrastructure referencing and teardown control */ - u8 setup_state; /* infrastructure setup state */ + volatile u8 setup_state; /* infrastructure setup state */ wait_queue_head_t teardown_wq; /* kthread waiting to teardown infra */ atomic_t references; /* #of references to infrastructure */ diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c index 0bf6fbcc46d..6d02dac8056 100644 --- a/arch/ia64/sn/kernel/xpc_channel.c +++ b/arch/ia64/sn/kernel/xpc_channel.c @@ -209,7 +209,7 @@ xpc_setup_infrastructure(struct xpc_partition *part) * With the setting of the partition setup_state to XPC_P_SETUP, we're * declaring that this partition is ready to go. */ - (volatile u8) part->setup_state = XPC_P_SETUP; + part->setup_state = XPC_P_SETUP; /* @@ -227,7 +227,7 @@ xpc_setup_infrastructure(struct xpc_partition *part) xpc_vars_part[partid].IPI_phys_cpuid = cpu_physical_id(smp_processor_id()); xpc_vars_part[partid].nchannels = part->nchannels; - (volatile u64) xpc_vars_part[partid].magic = XPC_VP_MAGIC1; + xpc_vars_part[partid].magic = XPC_VP_MAGIC1; return xpcSuccess; } @@ -355,7 +355,7 @@ xpc_pull_remote_vars_part(struct xpc_partition *part) /* let the other side know that we've pulled their variables */ - (volatile u64) xpc_vars_part[partid].magic = XPC_VP_MAGIC2; + xpc_vars_part[partid].magic = XPC_VP_MAGIC2; } if (pulled_entry->magic == XPC_VP_MAGIC1) { @@ -1183,7 +1183,7 @@ xpc_process_msg_IPI(struct xpc_partition *part, int ch_number) */ xpc_clear_local_msgqueue_flags(ch); - (volatile s64) ch->w_remote_GP.get = ch->remote_GP.get; + ch->w_remote_GP.get = ch->remote_GP.get; dev_dbg(xpc_chan, "w_remote_GP.get changed to %ld, partid=%d, " "channel=%d\n", ch->w_remote_GP.get, ch->partid, @@ -1211,7 +1211,7 @@ xpc_process_msg_IPI(struct xpc_partition *part, int ch_number) */ xpc_clear_remote_msgqueue_flags(ch); - (volatile s64) ch->w_remote_GP.put = ch->remote_GP.put; + ch->w_remote_GP.put = ch->remote_GP.put; dev_dbg(xpc_chan, "w_remote_GP.put changed to %ld, partid=%d, " "channel=%d\n", ch->w_remote_GP.put, ch->partid, @@ -1875,7 +1875,7 @@ xpc_send_msg(struct xpc_channel *ch, struct xpc_msg *msg, u8 notify_type, notify = &ch->notify_queue[msg_number % ch->local_nentries]; notify->func = func; notify->key = key; - (volatile u8) notify->type = notify_type; + notify->type = notify_type; // >>> is a mb() needed here? diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c index cd7ed73f0e7..578265ea9e6 100644 --- a/arch/ia64/sn/kernel/xpc_partition.c +++ b/arch/ia64/sn/kernel/xpc_partition.c @@ -253,7 +253,7 @@ xpc_rsvd_page_init(void) * This signifies to the remote partition that our reserved * page is initialized. */ - (volatile u64) rp->vars_pa = __pa(xpc_vars); + rp->vars_pa = __pa(xpc_vars); return rp; } -- cgit v1.2.3 From 7919a693bd735ed0aa93fc359ae09a588cfeb3bc Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 25 Jun 2005 14:54:30 -0700 Subject: [PATCH] Serial: remove unnecessary register_serial/unregister_serial A couple of drivers declare register_serial/unregister_serial prototypes but don't use them. FRV contains a commented out call to register_serial. Since these are deprecated, remove these unnecessary references. Signed-off-by: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/frv/kernel/setup.c | 2 -- arch/xtensa/platform-iss/console.c | 3 --- drivers/char/amiserial.c | 4 ---- 3 files changed, 9 deletions(-) diff --git a/arch/frv/kernel/setup.c b/arch/frv/kernel/setup.c index ef6865f0b97..767ebb55bd8 100644 --- a/arch/frv/kernel/setup.c +++ b/arch/frv/kernel/setup.c @@ -790,12 +790,10 @@ void __init setup_arch(char **cmdline_p) #ifndef CONFIG_GDBSTUB_UART0 __reg(UART0_BASE + UART_IER * 8) = 0; early_serial_setup(&__frv_uart0); -// register_serial(&__frv_uart0); #endif #ifndef CONFIG_GDBSTUB_UART1 __reg(UART1_BASE + UART_IER * 8) = 0; early_serial_setup(&__frv_uart1); -// register_serial(&__frv_uart1); #endif #if defined(CONFIG_CHR_DEV_FLASH) || defined(CONFIG_BLK_DEV_FLASH) diff --git a/arch/xtensa/platform-iss/console.c b/arch/xtensa/platform-iss/console.c index 9e2b53f6a90..4fbddf92a92 100644 --- a/arch/xtensa/platform-iss/console.c +++ b/arch/xtensa/platform-iss/console.c @@ -198,9 +198,6 @@ static int rs_read_proc(char *page, char **start, off_t off, int count, } -int register_serial(struct serial_struct*); -void unregister_serial(int); - static struct tty_operations serial_ops = { .open = rs_open, .close = rs_close, diff --git a/drivers/char/amiserial.c b/drivers/char/amiserial.c index 777bc499bbb..2a36561eec6 100644 --- a/drivers/char/amiserial.c +++ b/drivers/char/amiserial.c @@ -1973,10 +1973,6 @@ static _INLINE_ void show_serial_version(void) } -int register_serial(struct serial_struct *req); -void unregister_serial(int line); - - static struct tty_operations serial_ops = { .open = rs_open, .close = rs_close, -- cgit v1.2.3 From b4819b593740a6d11db07b52e0fe35975b29a185 Mon Sep 17 00:00:00 2001 From: Yoichi Yuasa Date: Sat, 25 Jun 2005 14:54:31 -0700 Subject: [PATCH] mips: add MIPS-specific support for flatmem/discontigmem 2.6.12-git6 doesn't boot on some MIPS machines. They need the support of flat memory and discontig memory. Signed-off-by: Yoichi Yuasa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/Kconfig | 6 ++++++ arch/mips/kernel/setup.c | 4 ++++ arch/mips/mm/init.c | 4 ++-- arch/mips/mm/pgtable.c | 2 +- include/asm-mips/mmzone.h | 4 ++++ include/asm-mips/page.h | 2 +- include/asm-mips/pgtable.h | 2 +- 7 files changed, 19 insertions(+), 5 deletions(-) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 94f5a8eb2c2..bd9de7b00c0 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1416,6 +1416,12 @@ config HIGHMEM bool "High Memory Support" depends on MIPS32 && (CPU_R3000 || CPU_SB1 || CPU_R7000 || CPU_RM9000 || CPU_R10000) && !(MACH_DECSTATION || MOMENCO_JAGUAR_ATX) +config ARCH_FLATMEM_ENABLE + def_bool y + depends on !NUMA + +source "mm/Kconfig" + config SMP bool "Multi-Processing support" depends on CPU_RM9000 || (SIBYTE_SB1250 && !SIBYTE_STANDALONE) || SGI_IP27 diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 6018ca25ace..3a240e3e004 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -356,6 +357,8 @@ static inline void bootmem_init(void) } #endif + memory_present(0, first_usable_pfn, max_low_pfn); + /* Initialize the boot-time allocator with low memory only. */ bootmap_size = init_bootmem(first_usable_pfn, max_low_pfn); @@ -557,6 +560,7 @@ void __init setup_arch(char **cmdline_p) parse_cmdline_early(); bootmem_init(); + sparse_init(); paging_init(); resource_init(); } diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 73843c52877..9c9a271c8a3 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -128,7 +128,7 @@ static void __init fixrange_init(unsigned long start, unsigned long end, #endif /* CONFIG_MIPS64 */ #endif /* CONFIG_HIGHMEM */ -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_NEED_MULTIPLE_NODES extern void pagetable_init(void); void __init paging_init(void) @@ -253,7 +253,7 @@ void __init mem_init(void) initsize >> 10, (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))); } -#endif /* !CONFIG_DISCONTIGMEM */ +#endif /* !CONFIG_NEED_MULTIPLE_NODES */ #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) diff --git a/arch/mips/mm/pgtable.c b/arch/mips/mm/pgtable.c index 3b88fdeef32..3fe94202da8 100644 --- a/arch/mips/mm/pgtable.c +++ b/arch/mips/mm/pgtable.c @@ -5,7 +5,7 @@ void show_mem(void) { -#ifndef CONFIG_DISCONTIGMEM /* XXX(hch): later.. */ +#ifndef CONFIG_NEED_MULTIPLE_NODES /* XXX(hch): later.. */ int pfn, total = 0, reserved = 0; int shared = 0, cached = 0; int highmem = 0; diff --git a/include/asm-mips/mmzone.h b/include/asm-mips/mmzone.h index 29ee13be0b2..d721143dbd4 100644 --- a/include/asm-mips/mmzone.h +++ b/include/asm-mips/mmzone.h @@ -8,6 +8,8 @@ #include #include +#ifdef CONFIG_DISCONTIGMEM + #define kvaddr_to_nid(kvaddr) pa_to_nid(__pa(kvaddr)) #define pfn_to_nid(pfn) pa_to_nid((pfn) << PAGE_SHIFT) @@ -36,4 +38,6 @@ /* XXX: FIXME -- wli */ #define kern_addr_valid(addr) (0) +#endif /* CONFIG_DISCONTIGMEM */ + #endif /* _ASM_MMZONE_H_ */ diff --git a/include/asm-mips/page.h b/include/asm-mips/page.h index d1bf8240e73..5cae35cd9ba 100644 --- a/include/asm-mips/page.h +++ b/include/asm-mips/page.h @@ -127,7 +127,7 @@ static __inline__ int get_order(unsigned long size) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_NEED_MULTIPLE_NODES #define pfn_to_page(pfn) (mem_map + (pfn)) #define page_to_pfn(page) ((unsigned long)((page) - mem_map)) #define pfn_valid(pfn) ((pfn) < max_mapnr) diff --git a/include/asm-mips/pgtable.h b/include/asm-mips/pgtable.h index 878843203d6..e76ccd6e3a5 100644 --- a/include/asm-mips/pgtable.h +++ b/include/asm-mips/pgtable.h @@ -350,7 +350,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, __update_cache(vma, address, pte); } -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_NEED_MULTIPLE_NODES #define kern_addr_valid(addr) (1) #endif -- cgit v1.2.3 From 1e8a81c5a37907bc082025d3468718116dca1eeb Mon Sep 17 00:00:00 2001 From: Hifumi Hisashi Date: Sat, 25 Jun 2005 14:54:32 -0700 Subject: [PATCH] Fix the error handling in direct I/O Fix a bug on error handling in the direct I/O function. Currently, if a file is opened with the O_DIRECT|O_SYNC flag, the write() syscall cannot receive the EIO error after an I/O error (SCSI cable is disconnected etc.). Return values of other points that call generic_osync_inode() are treated appropriately. Signed-off-by: Hisashi Hifumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/filemap.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 7332194d7af..b573607b711 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1851,8 +1851,11 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, * i_sem is held, which protects generic_osync_inode() from * livelocking. */ - if (written >= 0 && file->f_flags & O_SYNC) - generic_osync_inode(inode, mapping, OSYNC_METADATA); + if (written >= 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { + int err = generic_osync_inode(inode, mapping, OSYNC_METADATA); + if (err < 0) + written = err; + } if (written == count && !is_sync_kiocb(iocb)) written = -EIOCBQUEUED; return written; -- cgit v1.2.3 From 2d15cab85b85a56cc886037cab43cc292923ff22 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 25 Jun 2005 14:54:33 -0700 Subject: [PATCH] mm: fix remap_pte_range BUG Out-of-tree user of remap_pfn_range hit kernel BUG at mm/memory.c:1112! It passes an unrounded size to remap_pfn_range, which was okay before 2.6.12, but misses remap_pte_range's new end condition. An audit of all the other ptwalks confirms that this is the only one so exposed. Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory.c b/mm/memory.c index c256175742a..beabdefa625 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1139,7 +1139,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, { pgd_t *pgd; unsigned long next; - unsigned long end = addr + size; + unsigned long end = addr + PAGE_ALIGN(size); struct mm_struct *mm = vma->vm_mm; int err; -- cgit v1.2.3 From 6b9921976f0861e04828b3aff66696c1f3fd900d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lorenzo=20Hernandez=20Garc=EDa-Hierro?= Date: Sat, 25 Jun 2005 14:54:34 -0700 Subject: [PATCH] selinux: add executable stack check This patch adds an execstack permission check that controls the ability to make the main process stack executable so that attempts to make the stack executable can still be prevented even if the process is allowed the existing execmem permission in order to e.g. perform runtime code generation. Note that this does not yet address thread stacks. Note also that unlike the execmem check, the execstack check is only applied on mprotect calls, not mmap calls, as the current security_file_mmap hook is not passed the necessary information presently. The original author of the code that makes the distinction of the stack region, is Ingo Molnar, who wrote it within his patch for /proc//maps markers. (http://marc.theaimsgroup.com/?l=linux-kernel&m=110719881508591&w=2) The patches also can be found at: http://pearls.tuxedo-es.org/patches/selinux/policy-execstack.patch http://pearls.tuxedo-es.org/patches/selinux/kernel-execstack.patch policy-execstack.patch is the patch that needs to be applied to the policy in order to support the execstack permission and exclude it from general_domain_access within macros/core_macros.te. kernel-execstack.patch adds such permission to the SELinux code within the kernel and adds the proper permission check to the selinux_file_mprotect() hook. Signed-off-by: Lorenzo Hernandez Garcia-Hierro Acked-by: James Morris Acked-by: Stephen Smalley Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- security/selinux/hooks.c | 10 ++++++++++ security/selinux/include/av_perm_to_string.h | 1 + security/selinux/include/av_permissions.h | 1 + 3 files changed, 12 insertions(+) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 87302a49067..ad725213f56 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2488,6 +2488,16 @@ static int selinux_file_mprotect(struct vm_area_struct *vma, if (rc) return rc; } + if (!vma->vm_file && (prot & PROT_EXEC) && + vma->vm_start <= vma->vm_mm->start_stack && + vma->vm_end >= vma->vm_mm->start_stack) { + /* Attempt to make the process stack executable. + * This has an additional execstack check. + */ + rc = task_has_perm(current, current, PROCESS__EXECSTACK); + if (rc) + return rc; + } #endif return file_map_prot_check(vma->vm_file, prot, vma->vm_flags&VM_SHARED); diff --git a/security/selinux/include/av_perm_to_string.h b/security/selinux/include/av_perm_to_string.h index 8928bb4d3c5..e81f0226c37 100644 --- a/security/selinux/include/av_perm_to_string.h +++ b/security/selinux/include/av_perm_to_string.h @@ -70,6 +70,7 @@ S_(SECCLASS_PROCESS, PROCESS__DYNTRANSITION, "dyntransition") S_(SECCLASS_PROCESS, PROCESS__SETCURRENT, "setcurrent") S_(SECCLASS_PROCESS, PROCESS__EXECMEM, "execmem") + S_(SECCLASS_PROCESS, PROCESS__EXECSTACK, "execstack") S_(SECCLASS_MSGQ, MSGQ__ENQUEUE, "enqueue") S_(SECCLASS_MSG, MSG__SEND, "send") S_(SECCLASS_MSG, MSG__RECEIVE, "receive") diff --git a/security/selinux/include/av_permissions.h b/security/selinux/include/av_permissions.h index bdfce4ca8f8..38ce18b3328 100644 --- a/security/selinux/include/av_permissions.h +++ b/security/selinux/include/av_permissions.h @@ -465,6 +465,7 @@ #define PROCESS__DYNTRANSITION 0x00800000UL #define PROCESS__SETCURRENT 0x01000000UL #define PROCESS__EXECMEM 0x02000000UL +#define PROCESS__EXECSTACK 0x04000000UL #define IPC__CREATE 0x00000001UL #define IPC__DESTROY 0x00000002UL -- cgit v1.2.3 From 09ffd94fb15d85fbf9eebb8180f50264b264d6fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lorenzo=20Hern=E1ndez=20Garc=EDa-Hierro?= Date: Sat, 25 Jun 2005 14:54:35 -0700 Subject: [PATCH] selinux: add executable heap check This patch,based on sample code by Roland McGrath, adds an execheap permission check that controls the ability to make the heap executable so that this can be prevented in almost all cases (the X server is presently an exception, but this will hopefully be resolved in the future) so that even programs with execmem permission will need to have the anonymous memory mapped in order to make it executable. The only reason that we use a permission check for such restriction (vs. making it unconditional) is that the X module loader presently needs it; it could possibly be made unconditional in the future when X is changed. The policy patch for the execheap permission is available at: http://pearls.tuxedo-es.org/patches/selinux/policy-execheap.patch Signed-off-by: Lorenzo Hernandez Garcia-Hierro Acked-by: James Morris Acked-by: Stephen Smalley Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- security/selinux/hooks.c | 11 +++++++++++ security/selinux/include/av_perm_to_string.h | 1 + security/selinux/include/av_permissions.h | 1 + 3 files changed, 13 insertions(+) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index ad725213f56..932eef18db3 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2477,6 +2477,17 @@ static int selinux_file_mprotect(struct vm_area_struct *vma, prot = reqprot; #ifndef CONFIG_PPC32 + if ((prot & PROT_EXEC) && !(vma->vm_flags & VM_EXECUTABLE) && + (vma->vm_start >= vma->vm_mm->start_brk && + vma->vm_end <= vma->vm_mm->brk)) { + /* + * We are making an executable mapping in the brk region. + * This has an additional execheap check. + */ + rc = task_has_perm(current, current, PROCESS__EXECHEAP); + if (rc) + return rc; + } if (vma->vm_file != NULL && vma->anon_vma != NULL && (prot & PROT_EXEC)) { /* * We are making executable a file mapping that has diff --git a/security/selinux/include/av_perm_to_string.h b/security/selinux/include/av_perm_to_string.h index e81f0226c37..1deb59e1b76 100644 --- a/security/selinux/include/av_perm_to_string.h +++ b/security/selinux/include/av_perm_to_string.h @@ -71,6 +71,7 @@ S_(SECCLASS_PROCESS, PROCESS__SETCURRENT, "setcurrent") S_(SECCLASS_PROCESS, PROCESS__EXECMEM, "execmem") S_(SECCLASS_PROCESS, PROCESS__EXECSTACK, "execstack") + S_(SECCLASS_PROCESS, PROCESS__EXECHEAP, "execheap") S_(SECCLASS_MSGQ, MSGQ__ENQUEUE, "enqueue") S_(SECCLASS_MSG, MSG__SEND, "send") S_(SECCLASS_MSG, MSG__RECEIVE, "receive") diff --git a/security/selinux/include/av_permissions.h b/security/selinux/include/av_permissions.h index 38ce18b3328..a78b5d59c9f 100644 --- a/security/selinux/include/av_permissions.h +++ b/security/selinux/include/av_permissions.h @@ -466,6 +466,7 @@ #define PROCESS__SETCURRENT 0x01000000UL #define PROCESS__EXECMEM 0x02000000UL #define PROCESS__EXECSTACK 0x04000000UL +#define PROCESS__EXECHEAP 0x08000000UL #define IPC__CREATE 0x00000001UL #define IPC__DESTROY 0x00000002UL -- cgit v1.2.3 From 62aa751d16399637325852bc0a1fcf13c2476dd7 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Sat, 25 Jun 2005 14:54:36 -0700 Subject: [PATCH] ppc32: Check return of ppc_sys_get_pdata before accessing pointer Ensure that the returned pointer from ppc_sys_get_pdata is not NULL before we start using it. This handles any cases where we have variants of processors on the same board with different functionality. Signed-off-by: Kumar Gala Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc/platforms/83xx/mpc834x_sys.c | 28 ++++++++++++--------- arch/ppc/platforms/85xx/mpc8540_ads.c | 46 ++++++++++++++++++++--------------- arch/ppc/platforms/85xx/mpc8560_ads.c | 28 ++++++++++++--------- arch/ppc/platforms/85xx/sbc8560.c | 28 ++++++++++++--------- arch/ppc/platforms/85xx/stx_gp3.c | 26 +++++++++++--------- 5 files changed, 89 insertions(+), 67 deletions(-) diff --git a/arch/ppc/platforms/83xx/mpc834x_sys.c b/arch/ppc/platforms/83xx/mpc834x_sys.c index 37ece154279..ddd04d4c1ea 100644 --- a/arch/ppc/platforms/83xx/mpc834x_sys.c +++ b/arch/ppc/platforms/83xx/mpc834x_sys.c @@ -94,20 +94,24 @@ mpc834x_sys_setup_arch(void) /* setup the board related information for the enet controllers */ pdata = (struct gianfar_platform_data *) ppc_sys_get_pdata(MPC83xx_TSEC1); - pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; - pdata->interruptPHY = MPC83xx_IRQ_EXT1; - pdata->phyid = 0; - /* fixup phy address */ - pdata->phy_reg_addr += binfo->bi_immr_base; - memcpy(pdata->mac_addr, binfo->bi_enetaddr, 6); + if (pdata) { + pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; + pdata->interruptPHY = MPC83xx_IRQ_EXT1; + pdata->phyid = 0; + /* fixup phy address */ + pdata->phy_reg_addr += binfo->bi_immr_base; + memcpy(pdata->mac_addr, binfo->bi_enetaddr, 6); + } pdata = (struct gianfar_platform_data *) ppc_sys_get_pdata(MPC83xx_TSEC2); - pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; - pdata->interruptPHY = MPC83xx_IRQ_EXT2; - pdata->phyid = 1; - /* fixup phy address */ - pdata->phy_reg_addr += binfo->bi_immr_base; - memcpy(pdata->mac_addr, binfo->bi_enet1addr, 6); + if (pdata) { + pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; + pdata->interruptPHY = MPC83xx_IRQ_EXT2; + pdata->phyid = 1; + /* fixup phy address */ + pdata->phy_reg_addr += binfo->bi_immr_base; + memcpy(pdata->mac_addr, binfo->bi_enet1addr, 6); + } #ifdef CONFIG_BLK_DEV_INITRD if (initrd_start) diff --git a/arch/ppc/platforms/85xx/mpc8540_ads.c b/arch/ppc/platforms/85xx/mpc8540_ads.c index a2ed611cd93..ddd2e9a5bb1 100644 --- a/arch/ppc/platforms/85xx/mpc8540_ads.c +++ b/arch/ppc/platforms/85xx/mpc8540_ads.c @@ -92,28 +92,34 @@ mpc8540ads_setup_arch(void) /* setup the board related information for the enet controllers */ pdata = (struct gianfar_platform_data *) ppc_sys_get_pdata(MPC85xx_TSEC1); - pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; - pdata->interruptPHY = MPC85xx_IRQ_EXT5; - pdata->phyid = 0; - /* fixup phy address */ - pdata->phy_reg_addr += binfo->bi_immr_base; - memcpy(pdata->mac_addr, binfo->bi_enetaddr, 6); + if (pdata) { + pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; + pdata->interruptPHY = MPC85xx_IRQ_EXT5; + pdata->phyid = 0; + /* fixup phy address */ + pdata->phy_reg_addr += binfo->bi_immr_base; + memcpy(pdata->mac_addr, binfo->bi_enetaddr, 6); + } pdata = (struct gianfar_platform_data *) ppc_sys_get_pdata(MPC85xx_TSEC2); - pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; - pdata->interruptPHY = MPC85xx_IRQ_EXT5; - pdata->phyid = 1; - /* fixup phy address */ - pdata->phy_reg_addr += binfo->bi_immr_base; - memcpy(pdata->mac_addr, binfo->bi_enet1addr, 6); - - pdata = (struct gianfar_platform_data *) ppc_sys_get_pdata(MPC85xx_FEC); - pdata->board_flags = 0; - pdata->interruptPHY = MPC85xx_IRQ_EXT5; - pdata->phyid = 3; - /* fixup phy address */ - pdata->phy_reg_addr += binfo->bi_immr_base; - memcpy(pdata->mac_addr, binfo->bi_enet2addr, 6); + if (pdata) { + pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; + pdata->interruptPHY = MPC85xx_IRQ_EXT5; + pdata->phyid = 1; + /* fixup phy address */ + pdata->phy_reg_addr += binfo->bi_immr_base; + memcpy(pdata->mac_addr, binfo->bi_enet1addr, 6); + } + + if (pdata) { + pdata = (struct gianfar_platform_data *) ppc_sys_get_pdata(MPC85xx_FEC); + pdata->board_flags = 0; + pdata->interruptPHY = MPC85xx_IRQ_EXT5; + pdata->phyid = 3; + /* fixup phy address */ + pdata->phy_reg_addr += binfo->bi_immr_base; + memcpy(pdata->mac_addr, binfo->bi_enet2addr, 6); + } #ifdef CONFIG_BLK_DEV_INITRD if (initrd_start) diff --git a/arch/ppc/platforms/85xx/mpc8560_ads.c b/arch/ppc/platforms/85xx/mpc8560_ads.c index d87dfd5ce0a..e18380258b6 100644 --- a/arch/ppc/platforms/85xx/mpc8560_ads.c +++ b/arch/ppc/platforms/85xx/mpc8560_ads.c @@ -90,20 +90,24 @@ mpc8560ads_setup_arch(void) /* setup the board related information for the enet controllers */ pdata = (struct gianfar_platform_data *) ppc_sys_get_pdata(MPC85xx_TSEC1); - pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; - pdata->interruptPHY = MPC85xx_IRQ_EXT5; - pdata->phyid = 0; - /* fixup phy address */ - pdata->phy_reg_addr += binfo->bi_immr_base; - memcpy(pdata->mac_addr, binfo->bi_enetaddr, 6); + if (pdata) { + pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; + pdata->interruptPHY = MPC85xx_IRQ_EXT5; + pdata->phyid = 0; + /* fixup phy address */ + pdata->phy_reg_addr += binfo->bi_immr_base; + memcpy(pdata->mac_addr, binfo->bi_enetaddr, 6); + } pdata = (struct gianfar_platform_data *) ppc_sys_get_pdata(MPC85xx_TSEC2); - pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; - pdata->interruptPHY = MPC85xx_IRQ_EXT5; - pdata->phyid = 1; - /* fixup phy address */ - pdata->phy_reg_addr += binfo->bi_immr_base; - memcpy(pdata->mac_addr, binfo->bi_enet1addr, 6); + if (pdata) { + pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; + pdata->interruptPHY = MPC85xx_IRQ_EXT5; + pdata->phyid = 1; + /* fixup phy address */ + pdata->phy_reg_addr += binfo->bi_immr_base; + memcpy(pdata->mac_addr, binfo->bi_enet1addr, 6); + } #ifdef CONFIG_BLK_DEV_INITRD if (initrd_start) diff --git a/arch/ppc/platforms/85xx/sbc8560.c b/arch/ppc/platforms/85xx/sbc8560.c index 3dbdd73618e..165df94d4aa 100644 --- a/arch/ppc/platforms/85xx/sbc8560.c +++ b/arch/ppc/platforms/85xx/sbc8560.c @@ -129,20 +129,24 @@ sbc8560_setup_arch(void) /* setup the board related information for the enet controllers */ pdata = (struct gianfar_platform_data *) ppc_sys_get_pdata(MPC85xx_TSEC1); - pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; - pdata->interruptPHY = MPC85xx_IRQ_EXT6; - pdata->phyid = 25; - /* fixup phy address */ - pdata->phy_reg_addr += binfo->bi_immr_base; - memcpy(pdata->mac_addr, binfo->bi_enetaddr, 6); + if (pdata) { + pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; + pdata->interruptPHY = MPC85xx_IRQ_EXT6; + pdata->phyid = 25; + /* fixup phy address */ + pdata->phy_reg_addr += binfo->bi_immr_base; + memcpy(pdata->mac_addr, binfo->bi_enetaddr, 6); + } pdata = (struct gianfar_platform_data *) ppc_sys_get_pdata(MPC85xx_TSEC2); - pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; - pdata->interruptPHY = MPC85xx_IRQ_EXT7; - pdata->phyid = 26; - /* fixup phy address */ - pdata->phy_reg_addr += binfo->bi_immr_base; - memcpy(pdata->mac_addr, binfo->bi_enet1addr, 6); + if (pdata) { + pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; + pdata->interruptPHY = MPC85xx_IRQ_EXT7; + pdata->phyid = 26; + /* fixup phy address */ + pdata->phy_reg_addr += binfo->bi_immr_base; + memcpy(pdata->mac_addr, binfo->bi_enet1addr, 6); + } #ifdef CONFIG_BLK_DEV_INITRD if (initrd_start) diff --git a/arch/ppc/platforms/85xx/stx_gp3.c b/arch/ppc/platforms/85xx/stx_gp3.c index 9455bb6b45e..bb41265cfc8 100644 --- a/arch/ppc/platforms/85xx/stx_gp3.c +++ b/arch/ppc/platforms/85xx/stx_gp3.c @@ -122,19 +122,23 @@ gp3_setup_arch(void) /* setup the board related information for the enet controllers */ pdata = (struct gianfar_platform_data *) ppc_sys_get_pdata(MPC85xx_TSEC1); -/* pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; */ - pdata->interruptPHY = MPC85xx_IRQ_EXT5; - pdata->phyid = 2; - pdata->phy_reg_addr += binfo->bi_immr_base; - memcpy(pdata->mac_addr, binfo->bi_enetaddr, 6); + if (pdata) { + /* pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; */ + pdata->interruptPHY = MPC85xx_IRQ_EXT5; + pdata->phyid = 2; + pdata->phy_reg_addr += binfo->bi_immr_base; + memcpy(pdata->mac_addr, binfo->bi_enetaddr, 6); + } pdata = (struct gianfar_platform_data *) ppc_sys_get_pdata(MPC85xx_TSEC2); -/* pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; */ - pdata->interruptPHY = MPC85xx_IRQ_EXT5; - pdata->phyid = 4; - /* fixup phy address */ - pdata->phy_reg_addr += binfo->bi_immr_base; - memcpy(pdata->mac_addr, binfo->bi_enet1addr, 6); + if (pdata) { + /* pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; */ + pdata->interruptPHY = MPC85xx_IRQ_EXT5; + pdata->phyid = 4; + /* fixup phy address */ + pdata->phy_reg_addr += binfo->bi_immr_base; + memcpy(pdata->mac_addr, binfo->bi_enet1addr, 6); + } #ifdef CONFIG_BLK_DEV_INITRD if (initrd_start) -- cgit v1.2.3 From 33d9e9b56d5ccd7776fdfe3ecce4a2793dee6fd3 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Sat, 25 Jun 2005 14:54:37 -0700 Subject: [PATCH] ppc32: Add support for Freescale e200 (Book-E) core The e200 core is a Book-E core (similar to e500) that has a unified L1 cache and is not cache coherent on the bus. The e200 core also adds a separate exception level for debug exceptions. Part of this patch helps to cleanup a few cases that are true for all Freescale Book-E parts, not just e500. Signed-off-by: Kim Phillips Signed-off-by: Kumar Gala Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc/Kconfig | 17 ++++++----- arch/ppc/Makefile | 3 +- arch/ppc/kernel/Makefile | 2 ++ arch/ppc/kernel/cputable.c | 25 +++++++++++++++- arch/ppc/kernel/entry.S | 9 ++++++ arch/ppc/kernel/head_booke.h | 64 +++++++++++++++++++++++++++++++++++++++- arch/ppc/kernel/head_fsl_booke.S | 51 ++++++++++++++++++++++++++++++++ arch/ppc/kernel/misc.S | 8 +++++ arch/ppc/kernel/perfmon.c | 2 +- arch/ppc/kernel/traps.c | 24 +++++++++++++-- arch/ppc/mm/fsl_booke_mmu.c | 2 +- include/asm-ppc/mmu.h | 2 +- include/asm-ppc/mmu_context.h | 2 +- include/asm-ppc/ppc_asm.h | 2 ++ include/asm-ppc/reg.h | 1 + include/asm-ppc/reg_booke.h | 18 ++++++++++- 16 files changed, 214 insertions(+), 18 deletions(-) diff --git a/arch/ppc/Kconfig b/arch/ppc/Kconfig index 848f43970a4..979590a9baf 100644 --- a/arch/ppc/Kconfig +++ b/arch/ppc/Kconfig @@ -88,6 +88,9 @@ config 8xx depends on BROKEN bool "8xx" +config E200 + bool "e200" + config E500 bool "e500" @@ -98,12 +101,12 @@ config PPC_FPU config BOOKE bool - depends on E500 + depends on E200 || E500 default y config FSL_BOOKE bool - depends on E500 + depends on E200 || E500 default y config PTE_64BIT @@ -141,16 +144,16 @@ config ALTIVEC config SPE bool "SPE Support" - depends on E500 + depends on E200 || E500 ---help--- This option enables kernel support for the Signal Processing Extensions (SPE) to the PowerPC processor. The kernel currently supports saving and restoring SPE registers, and turning on the 'spe enable' bit so user processes can execute SPE instructions. - This option is only usefully if you have a processor that supports + This option is only useful if you have a processor that supports SPE (e500, otherwise known as 85xx series), but does not have any - affect on a non-spe cpu (it does, however add code to the kernel). + effect on a non-spe cpu (it does, however add code to the kernel). If in doubt, say Y here. @@ -200,7 +203,7 @@ config TAU_AVERAGE config MATH_EMULATION bool "Math emulation" - depends on 4xx || 8xx || E500 + depends on 4xx || 8xx || E200 || E500 ---help--- Some PowerPC chips designed for embedded applications do not have a floating-point unit and therefore do not implement the @@ -254,7 +257,7 @@ config PPC_STD_MMU config NOT_COHERENT_CACHE bool - depends on 4xx || 8xx + depends on 4xx || 8xx || E200 default y endmenu diff --git a/arch/ppc/Makefile b/arch/ppc/Makefile index 0432a25b473..f9b0d778dd8 100644 --- a/arch/ppc/Makefile +++ b/arch/ppc/Makefile @@ -29,7 +29,7 @@ CPP = $(CC) -E $(CFLAGS) CHECKFLAGS += -D__powerpc__ -ifndef CONFIG_E500 +ifndef CONFIG_FSL_BOOKE CFLAGS += -mstring endif @@ -38,6 +38,7 @@ cpu-as-$(CONFIG_4xx) += -Wa,-m405 cpu-as-$(CONFIG_6xx) += -Wa,-maltivec cpu-as-$(CONFIG_POWER4) += -Wa,-maltivec cpu-as-$(CONFIG_E500) += -Wa,-me500 +cpu-as-$(CONFIG_E200) += -Wa,-me200 AFLAGS += $(cpu-as-y) CFLAGS += $(cpu-as-y) diff --git a/arch/ppc/kernel/Makefile b/arch/ppc/kernel/Makefile index b284451802c..8276c0b551d 100644 --- a/arch/ppc/kernel/Makefile +++ b/arch/ppc/kernel/Makefile @@ -26,7 +26,9 @@ obj-$(CONFIG_KGDB) += ppc-stub.o obj-$(CONFIG_SMP) += smp.o smp-tbsync.o obj-$(CONFIG_TAU) += temp.o obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o +ifndef CONFIG_E200 obj-$(CONFIG_FSL_BOOKE) += perfmon_fsl_booke.o +endif ifndef CONFIG_MATH_EMULATION obj-$(CONFIG_8xx) += softemu8xx.o diff --git a/arch/ppc/kernel/cputable.c b/arch/ppc/kernel/cputable.c index 01c226008db..50936cda0af 100644 --- a/arch/ppc/kernel/cputable.c +++ b/arch/ppc/kernel/cputable.c @@ -903,7 +903,30 @@ struct cpu_spec cpu_specs[] = { .dcache_bsize = 32, }, #endif /* CONFIG_44x */ -#ifdef CONFIG_E500 +#ifdef CONFIG_FSL_BOOKE + { /* e200z5 */ + .pvr_mask = 0xfff00000, + .pvr_value = 0x81000000, + .cpu_name = "e200z5", + /* xxx - galak: add CPU_FTR_MAYBE_CAN_DOZE */ + .cpu_features = CPU_FTR_USE_TB, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_EFP_SINGLE | + PPC_FEATURE_UNIFIED_CACHE, + .dcache_bsize = 32, + }, + { /* e200z6 */ + .pvr_mask = 0xfff00000, + .pvr_value = 0x81100000, + .cpu_name = "e200z6", + /* xxx - galak: add CPU_FTR_MAYBE_CAN_DOZE */ + .cpu_features = CPU_FTR_USE_TB, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_SPE_COMP | + PPC_FEATURE_HAS_EFP_SINGLE | + PPC_FEATURE_UNIFIED_CACHE, + .dcache_bsize = 32, + }, { /* e500 */ .pvr_mask = 0xffff0000, .pvr_value = 0x80200000, diff --git a/arch/ppc/kernel/entry.S b/arch/ppc/kernel/entry.S index 8377b6ca26d..d4df68629cc 100644 --- a/arch/ppc/kernel/entry.S +++ b/arch/ppc/kernel/entry.S @@ -60,6 +60,11 @@ mcheck_transfer_to_handler: TRANSFER_TO_HANDLER_EXC_LEVEL(MCHECK) b transfer_to_handler_full + .globl debug_transfer_to_handler +debug_transfer_to_handler: + TRANSFER_TO_HANDLER_EXC_LEVEL(DEBUG) + b transfer_to_handler_full + .globl crit_transfer_to_handler crit_transfer_to_handler: TRANSFER_TO_HANDLER_EXC_LEVEL(CRIT) @@ -835,6 +840,10 @@ ret_from_crit_exc: RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, RFCI) #ifdef CONFIG_BOOKE + .globl ret_from_debug_exc +ret_from_debug_exc: + RET_FROM_EXC_LEVEL(SPRN_DSRR0, SPRN_DSRR1, RFDI) + .globl ret_from_mcheck_exc ret_from_mcheck_exc: RET_FROM_EXC_LEVEL(SPRN_MCSRR0, SPRN_MCSRR1, RFMCI) diff --git a/arch/ppc/kernel/head_booke.h b/arch/ppc/kernel/head_booke.h index 9c50f9d2657..9342acf12e7 100644 --- a/arch/ppc/kernel/head_booke.h +++ b/arch/ppc/kernel/head_booke.h @@ -49,6 +49,7 @@ * * On 40x critical is the only additional level * On 44x/e500 we have critical and machine check + * On e200 we have critical and debug (machine check occurs via critical) * * Additionally we reserve a SPRG for each priority level so we can free up a * GPR to use as the base for indirect access to the exception stacks. This @@ -60,12 +61,16 @@ /* CRIT_SPRG only used in critical exception handling */ #define CRIT_SPRG SPRN_SPRG2 -/* MCHECK_SPRG only used in critical exception handling */ +/* MCHECK_SPRG only used in machine check exception handling */ #define MCHECK_SPRG SPRN_SPRG6W #define MCHECK_STACK_TOP (exception_stack_top - 4096) #define CRIT_STACK_TOP (exception_stack_top) +/* only on e200 for now */ +#define DEBUG_STACK_TOP (exception_stack_top - 4096) +#define DEBUG_SPRG SPRN_SPRG6W + #ifdef CONFIG_SMP #define BOOKE_LOAD_EXC_LEVEL_STACK(level) \ mfspr r8,SPRN_PIR; \ @@ -124,6 +129,8 @@ #define CRITICAL_EXCEPTION_PROLOG \ EXC_LEVEL_EXCEPTION_PROLOG(CRIT, SPRN_CSRR0, SPRN_CSRR1) +#define DEBUG_EXCEPTION_PROLOG \ + EXC_LEVEL_EXCEPTION_PROLOG(DEBUG, SPRN_DSRR0, SPRN_DSRR1) #define MCHECK_EXCEPTION_PROLOG \ EXC_LEVEL_EXCEPTION_PROLOG(MCHECK, SPRN_MCSRR0, SPRN_MCSRR1) @@ -205,6 +212,60 @@ label: * save (and later restore) the MSR via SPRN_CSRR1, which will still have * the MSR_DE bit set. */ +#ifdef CONFIG_E200 +#define DEBUG_EXCEPTION \ + START_EXCEPTION(Debug); \ + DEBUG_EXCEPTION_PROLOG; \ + \ + /* \ + * If there is a single step or branch-taken exception in an \ + * exception entry sequence, it was probably meant to apply to \ + * the code where the exception occurred (since exception entry \ + * doesn't turn off DE automatically). We simulate the effect \ + * of turning off DE on entry to an exception handler by turning \ + * off DE in the CSRR1 value and clearing the debug status. \ + */ \ + mfspr r10,SPRN_DBSR; /* check single-step/branch taken */ \ + andis. r10,r10,DBSR_IC@h; \ + beq+ 2f; \ + \ + lis r10,KERNELBASE@h; /* check if exception in vectors */ \ + ori r10,r10,KERNELBASE@l; \ + cmplw r12,r10; \ + blt+ 2f; /* addr below exception vectors */ \ + \ + lis r10,Debug@h; \ + ori r10,r10,Debug@l; \ + cmplw r12,r10; \ + bgt+ 2f; /* addr above exception vectors */ \ + \ + /* here it looks like we got an inappropriate debug exception. */ \ +1: rlwinm r9,r9,0,~MSR_DE; /* clear DE in the CDRR1 value */ \ + lis r10,DBSR_IC@h; /* clear the IC event */ \ + mtspr SPRN_DBSR,r10; \ + /* restore state and get out */ \ + lwz r10,_CCR(r11); \ + lwz r0,GPR0(r11); \ + lwz r1,GPR1(r11); \ + mtcrf 0x80,r10; \ + mtspr SPRN_DSRR0,r12; \ + mtspr SPRN_DSRR1,r9; \ + lwz r9,GPR9(r11); \ + lwz r12,GPR12(r11); \ + mtspr DEBUG_SPRG,r8; \ + BOOKE_LOAD_EXC_LEVEL_STACK(DEBUG); /* r8 points to the debug stack */ \ + lwz r10,GPR10-INT_FRAME_SIZE(r8); \ + lwz r11,GPR11-INT_FRAME_SIZE(r8); \ + mfspr r8,DEBUG_SPRG; \ + \ + RFDI; \ + b .; \ + \ + /* continue normal handling for a critical exception... */ \ +2: mfspr r4,SPRN_DBSR; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, debug_transfer_to_handler, ret_from_debug_exc) +#else #define DEBUG_EXCEPTION \ START_EXCEPTION(Debug); \ CRITICAL_EXCEPTION_PROLOG; \ @@ -257,6 +318,7 @@ label: 2: mfspr r4,SPRN_DBSR; \ addi r3,r1,STACK_FRAME_OVERHEAD; \ EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, crit_transfer_to_handler, ret_from_crit_exc) +#endif #define INSTRUCTION_STORAGE_EXCEPTION \ START_EXCEPTION(InstructionStorage) \ diff --git a/arch/ppc/kernel/head_fsl_booke.S b/arch/ppc/kernel/head_fsl_booke.S index ce36e88ba62..eb804b7a3cb 100644 --- a/arch/ppc/kernel/head_fsl_booke.S +++ b/arch/ppc/kernel/head_fsl_booke.S @@ -102,6 +102,7 @@ invstr: mflr r6 /* Make it accessible */ or r7,r7,r4 mtspr SPRN_MAS6,r7 tlbsx 0,r6 /* search MSR[IS], SPID=PID0 */ +#ifndef CONFIG_E200 mfspr r7,SPRN_MAS1 andis. r7,r7,MAS1_VALID@h bne match_TLB @@ -118,6 +119,7 @@ invstr: mflr r6 /* Make it accessible */ or r7,r7,r4 mtspr SPRN_MAS6,r7 tlbsx 0,r6 /* Fall through, we had to match */ +#endif match_TLB: mfspr r7,SPRN_MAS0 rlwinm r3,r7,16,20,31 /* Extract MAS0(Entry) */ @@ -196,8 +198,10 @@ skpinv: addi r6,r6,1 /* Increment */ /* 4. Clear out PIDs & Search info */ li r6,0 mtspr SPRN_PID0,r6 +#ifndef CONFIG_E200 mtspr SPRN_PID1,r6 mtspr SPRN_PID2,r6 +#endif mtspr SPRN_MAS6,r6 /* 5. Invalidate mapping we started in */ @@ -277,7 +281,9 @@ skpinv: addi r6,r6,1 /* Increment */ SET_IVOR(32, SPEUnavailable); SET_IVOR(33, SPEFloatingPointData); SET_IVOR(34, SPEFloatingPointRound); +#ifndef CONFIG_E200 SET_IVOR(35, PerformanceMonitor); +#endif /* Establish the interrupt vector base */ lis r4,interrupt_base@h /* IVPR only uses the high 16-bits */ @@ -285,6 +291,9 @@ skpinv: addi r6,r6,1 /* Increment */ /* Setup the defaults for TLB entries */ li r2,(MAS4_TSIZED(BOOKE_PAGESZ_4K))@l +#ifdef CONFIG_E200 + oris r2,r2,MAS4_TLBSELD(1)@h +#endif mtspr SPRN_MAS4, r2 #if 0 @@ -293,6 +302,12 @@ skpinv: addi r6,r6,1 /* Increment */ oris r2,r2,HID0_DOZE@h mtspr SPRN_HID0, r2 #endif +#ifdef CONFIG_E200 + /* enable dedicated debug exception handling resources (Debug APU) */ + mfspr r2,SPRN_HID0 + ori r2,r2,HID0_DAPUEN@l + mtspr SPRN_HID0,r2 +#endif #if !defined(CONFIG_BDI_SWITCH) /* @@ -414,7 +429,12 @@ interrupt_base: CRITICAL_EXCEPTION(0x0100, CriticalInput, UnknownException) /* Machine Check Interrupt */ +#ifdef CONFIG_E200 + /* no RFMCI, MCSRRs on E200 */ + CRITICAL_EXCEPTION(0x0200, MachineCheck, MachineCheckException) +#else MCHECK_EXCEPTION(0x0200, MachineCheck, MachineCheckException) +#endif /* Data Storage Interrupt */ START_EXCEPTION(DataStorage) @@ -519,8 +539,13 @@ interrupt_base: /* Floating Point Unavailable Interrupt */ #ifdef CONFIG_PPC_FPU FP_UNAVAILABLE_EXCEPTION +#else +#ifdef CONFIG_E200 + /* E200 treats 'normal' floating point instructions as FP Unavail exception */ + EXCEPTION(0x0800, FloatingPointUnavailable, ProgramCheckException, EXC_XFER_EE) #else EXCEPTION(0x0800, FloatingPointUnavailable, UnknownException, EXC_XFER_EE) +#endif #endif /* System Call Interrupt */ @@ -691,6 +716,7 @@ interrupt_base: /* * Local functions */ + /* * Data TLB exceptions will bail out to this point * if they can't resolve the lightweight TLB fault. @@ -761,6 +787,31 @@ END_FTR_SECTION_IFSET(CPU_FTR_BIG_PHYS) 2: rlwimi r11, r12, 0, 20, 31 /* Extract RPN from PTE and merge with perms */ mtspr SPRN_MAS3, r11 #endif +#ifdef CONFIG_E200 + /* Round robin TLB1 entries assignment */ + mfspr r12, SPRN_MAS0 + + /* Extract TLB1CFG(NENTRY) */ + mfspr r11, SPRN_TLB1CFG + andi. r11, r11, 0xfff + + /* Extract MAS0(NV) */ + andi. r13, r12, 0xfff + addi r13, r13, 1 + cmpw 0, r13, r11 + addi r12, r12, 1 + + /* check if we need to wrap */ + blt 7f + + /* wrap back to first free tlbcam entry */ + lis r13, tlbcam_index@ha + lwz r13, tlbcam_index@l(r13) + rlwimi r12, r13, 0, 20, 31 +7: + mtspr SPRN_MAS0,r12 +#endif /* CONFIG_E200 */ + tlbwe /* Done...restore registers and get out of here. */ diff --git a/arch/ppc/kernel/misc.S b/arch/ppc/kernel/misc.S index 7329ef177a1..a3132f8e799 100644 --- a/arch/ppc/kernel/misc.S +++ b/arch/ppc/kernel/misc.S @@ -593,6 +593,14 @@ _GLOBAL(flush_instruction_cache) iccci 0,r3 #endif #elif CONFIG_FSL_BOOKE +BEGIN_FTR_SECTION + mfspr r3,SPRN_L1CSR0 + ori r3,r3,L1CSR0_CFI|L1CSR0_CLFC + /* msync; isync recommended here */ + mtspr SPRN_L1CSR0,r3 + isync + blr +END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE) mfspr r3,SPRN_L1CSR1 ori r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR mtspr SPRN_L1CSR1,r3 diff --git a/arch/ppc/kernel/perfmon.c b/arch/ppc/kernel/perfmon.c index 918f6b252e4..fa1dad96b83 100644 --- a/arch/ppc/kernel/perfmon.c +++ b/arch/ppc/kernel/perfmon.c @@ -36,7 +36,7 @@ /* A lock to regulate grabbing the interrupt */ DEFINE_SPINLOCK(perfmon_lock); -#ifdef CONFIG_FSL_BOOKE +#if defined (CONFIG_FSL_BOOKE) && !defined (CONFIG_E200) static void dummy_perf(struct pt_regs *regs) { unsigned int pmgc0 = mfpmr(PMRN_PMGC0); diff --git a/arch/ppc/kernel/traps.c b/arch/ppc/kernel/traps.c index 2ca8ecfeefd..9e6ae569665 100644 --- a/arch/ppc/kernel/traps.c +++ b/arch/ppc/kernel/traps.c @@ -173,13 +173,13 @@ static inline int check_io_access(struct pt_regs *regs) /* On 4xx, the reason for the machine check or program exception is in the ESR. */ #define get_reason(regs) ((regs)->dsisr) -#ifndef CONFIG_E500 +#ifndef CONFIG_FSL_BOOKE #define get_mc_reason(regs) ((regs)->dsisr) #else #define get_mc_reason(regs) (mfspr(SPRN_MCSR)) #endif #define REASON_FP ESR_FP -#define REASON_ILLEGAL ESR_PIL +#define REASON_ILLEGAL (ESR_PIL | ESR_PUO) #define REASON_PRIVILEGED ESR_PPR #define REASON_TRAP ESR_PTR @@ -302,7 +302,25 @@ void MachineCheckException(struct pt_regs *regs) printk("Bus - Instruction Parity Error\n"); if (reason & MCSR_BUS_RPERR) printk("Bus - Read Parity Error\n"); -#else /* !CONFIG_4xx && !CONFIG_E500 */ +#elif defined (CONFIG_E200) + printk("Machine check in kernel mode.\n"); + printk("Caused by (from MCSR=%lx): ", reason); + + if (reason & MCSR_MCP) + printk("Machine Check Signal\n"); + if (reason & MCSR_CP_PERR) + printk("Cache Push Parity Error\n"); + if (reason & MCSR_CPERR) + printk("Cache Parity Error\n"); + if (reason & MCSR_EXCP_ERR) + printk("ISI, ITLB, or Bus Error on first instruction fetch for an exception handler\n"); + if (reason & MCSR_BUS_IRERR) + printk("Bus - Read Bus Error on instruction fetch\n"); + if (reason & MCSR_BUS_DRERR) + printk("Bus - Read Bus Error on data load\n"); + if (reason & MCSR_BUS_WRERR) + printk("Bus - Write Bus Error on buffered store or cache line push\n"); +#else /* !CONFIG_4xx && !CONFIG_E500 && !CONFIG_E200 */ printk("Machine check in kernel mode.\n"); printk("Caused by (from SRR1=%lx): ", reason); switch (reason & 0x601F0000) { diff --git a/arch/ppc/mm/fsl_booke_mmu.c b/arch/ppc/mm/fsl_booke_mmu.c index e07990efa04..9fa884de5c7 100644 --- a/arch/ppc/mm/fsl_booke_mmu.c +++ b/arch/ppc/mm/fsl_booke_mmu.c @@ -126,7 +126,7 @@ void settlbcam(int index, unsigned long virt, phys_addr_t phys, flags |= _PAGE_COHERENT; #endif - TLBCAM[index].MAS0 = MAS0_TLBSEL(1) | MAS0_ESEL(index); + TLBCAM[index].MAS0 = MAS0_TLBSEL(1) | MAS0_ESEL(index) | MAS0_NV(index+1); TLBCAM[index].MAS1 = MAS1_VALID | MAS1_IPROT | MAS1_TSIZE(tsize) | MAS1_TID(pid); TLBCAM[index].MAS2 = virt & PAGE_MASK; diff --git a/include/asm-ppc/mmu.h b/include/asm-ppc/mmu.h index d465aee1c82..9205db404c7 100644 --- a/include/asm-ppc/mmu.h +++ b/include/asm-ppc/mmu.h @@ -405,7 +405,7 @@ typedef struct _P601_BAT { #define MAS0_TLBSEL(x) ((x << 28) & 0x30000000) #define MAS0_ESEL(x) ((x << 16) & 0x0FFF0000) -#define MAS0_NV 0x00000FFF +#define MAS0_NV(x) ((x) & 0x00000FFF) #define MAS1_VALID 0x80000000 #define MAS1_IPROT 0x40000000 diff --git a/include/asm-ppc/mmu_context.h b/include/asm-ppc/mmu_context.h index 9222fa6ca17..ccabbce39d8 100644 --- a/include/asm-ppc/mmu_context.h +++ b/include/asm-ppc/mmu_context.h @@ -63,7 +63,7 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) #define LAST_CONTEXT 255 #define FIRST_CONTEXT 1 -#elif defined(CONFIG_E500) +#elif defined(CONFIG_E200) || defined(CONFIG_E500) #define NO_CONTEXT 256 #define LAST_CONTEXT 255 #define FIRST_CONTEXT 1 diff --git a/include/asm-ppc/ppc_asm.h b/include/asm-ppc/ppc_asm.h index 13fa8e7483c..f76221def48 100644 --- a/include/asm-ppc/ppc_asm.h +++ b/include/asm-ppc/ppc_asm.h @@ -174,6 +174,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601) #define CLR_TOP32(r) #endif /* CONFIG_PPC64BRIDGE */ +#define RFCI .long 0x4c000066 /* rfci instruction */ +#define RFDI .long 0x4c00004e /* rfdi instruction */ #define RFMCI .long 0x4c00004c /* rfmci instruction */ #ifdef CONFIG_IBM405_ERR77 diff --git a/include/asm-ppc/reg.h b/include/asm-ppc/reg.h index c418aab7cd3..88b4222154d 100644 --- a/include/asm-ppc/reg.h +++ b/include/asm-ppc/reg.h @@ -160,6 +160,7 @@ #define HID0_ICFI (1<<11) /* Instr. Cache Flash Invalidate */ #define HID0_DCI (1<<10) /* Data Cache Invalidate */ #define HID0_SPD (1<<9) /* Speculative disable */ +#define HID0_DAPUEN (1<<8) /* Debug APU enable */ #define HID0_SGE (1<<7) /* Store Gathering Enable */ #define HID0_SIED (1<<7) /* Serial Instr. Execution [Disable] */ #define HID0_DFCA (1<<6) /* Data Cache Flush Assist */ diff --git a/include/asm-ppc/reg_booke.h b/include/asm-ppc/reg_booke.h index 45c5e6f2b7a..00ad9c754c7 100644 --- a/include/asm-ppc/reg_booke.h +++ b/include/asm-ppc/reg_booke.h @@ -165,6 +165,8 @@ do { \ #define SPRN_MCSRR1 0x23B /* Machine Check Save and Restore Register 1 */ #define SPRN_MCSR 0x23C /* Machine Check Status Register */ #define SPRN_MCAR 0x23D /* Machine Check Address Register */ +#define SPRN_DSRR0 0x23E /* Debug Save and Restore Register 0 */ +#define SPRN_DSRR1 0x23F /* Debug Save and Restore Register 1 */ #define SPRN_MAS0 0x270 /* MMU Assist Register 0 */ #define SPRN_MAS1 0x271 /* MMU Assist Register 1 */ #define SPRN_MAS2 0x272 /* MMU Assist Register 2 */ @@ -264,6 +266,17 @@ do { \ #define MCSR_BUS_IPERR 0x00000002UL /* Instruction parity Error */ #define MCSR_BUS_RPERR 0x00000001UL /* Read parity Error */ #endif +#ifdef CONFIG_E200 +#define MCSR_MCP 0x80000000UL /* Machine Check Input Pin */ +#define MCSR_CP_PERR 0x20000000UL /* Cache Push Parity Error */ +#define MCSR_CPERR 0x10000000UL /* Cache Parity Error */ +#define MCSR_EXCP_ERR 0x08000000UL /* ISI, ITLB, or Bus Error on 1st insn + fetch for an exception handler */ +#define MCSR_BUS_IRERR 0x00000010UL /* Read Bus Error on instruction fetch*/ +#define MCSR_BUS_DRERR 0x00000008UL /* Read Bus Error on data load */ +#define MCSR_BUS_WRERR 0x00000004UL /* Write Bus Error on buffered + store or cache line push */ +#endif /* Bit definitions for the DBSR. */ /* @@ -311,6 +324,7 @@ do { \ #define ESR_ST 0x00800000 /* Store Operation */ #define ESR_DLK 0x00200000 /* Data Cache Locking */ #define ESR_ILK 0x00100000 /* Instr. Cache Locking */ +#define ESR_PUO 0x00040000 /* Unimplemented Operation exception */ #define ESR_BO 0x00020000 /* Byte Ordering */ /* Bit definitions related to the DBCR0. */ @@ -387,10 +401,12 @@ do { \ #define ICCR_CACHE 1 /* Cacheable */ /* Bit definitions for L1CSR0. */ +#define L1CSR0_CLFC 0x00000100 /* Cache Lock Bits Flash Clear */ #define L1CSR0_DCFI 0x00000002 /* Data Cache Flash Invalidate */ +#define L1CSR0_CFI 0x00000002 /* Cache Flash Invalidate */ #define L1CSR0_DCE 0x00000001 /* Data Cache Enable */ -/* Bit definitions for L1CSR0. */ +/* Bit definitions for L1CSR1. */ #define L1CSR1_ICLFR 0x00000100 /* Instr Cache Lock Bits Flash Reset */ #define L1CSR1_ICFI 0x00000002 /* Instr Cache Flash Invalidate */ #define L1CSR1_ICE 0x00000001 /* Instr Cache Enable */ -- cgit v1.2.3 From 3d9077afea4927e488282da7189de9159db20c17 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Sat, 25 Jun 2005 14:54:39 -0700 Subject: [PATCH] ppc32: Remove FSL OCP support Support for the OCP device model on Freescale (FSL) PPC's is no longer used. All FSL PPC's that were using OCP have be converted to using the platform device model. Signed-off-by: Kumar Gala Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc/Kconfig.debug | 2 +- include/asm-ppc/fsl_ocp.h | 54 ----------------------------------------------- include/asm-ppc/ocp.h | 4 ---- 3 files changed, 1 insertion(+), 59 deletions(-) delete mode 100644 include/asm-ppc/fsl_ocp.h diff --git a/arch/ppc/Kconfig.debug b/arch/ppc/Kconfig.debug index d2e1eea8e8e..e16c7710d4b 100644 --- a/arch/ppc/Kconfig.debug +++ b/arch/ppc/Kconfig.debug @@ -66,7 +66,7 @@ config SERIAL_TEXT_DEBUG config PPC_OCP bool - depends on IBM_OCP || FSL_OCP || XILINX_OCP + depends on IBM_OCP || XILINX_OCP default y endmenu diff --git a/include/asm-ppc/fsl_ocp.h b/include/asm-ppc/fsl_ocp.h deleted file mode 100644 index 050fbba8d04..00000000000 --- a/include/asm-ppc/fsl_ocp.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * include/asm-ppc/fsl_ocp.h - * - * Definitions for the on-chip peripherals on Freescale PPC processors - * - * Maintainer: Kumar Gala (kumar.gala@freescale.com) - * - * Copyright 2004 Freescale Semiconductor, Inc - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#ifdef __KERNEL__ -#ifndef __ASM_FS_OCP_H__ -#define __ASM_FS_OCP_H__ - -/* A table of information for supporting the Gianfar Ethernet Controller - * This helps identify which enet controller we are dealing with, - * and what type of enet controller it is - */ -struct ocp_gfar_data { - uint interruptTransmit; - uint interruptError; - uint interruptReceive; - uint interruptPHY; - uint flags; - uint phyid; - uint phyregidx; - unsigned char mac_addr[6]; -}; - -/* Flags in the flags field */ -#define GFAR_HAS_COALESCE 0x20 -#define GFAR_HAS_RMON 0x10 -#define GFAR_HAS_MULTI_INTR 0x08 -#define GFAR_FIRM_SET_MACADDR 0x04 -#define GFAR_HAS_PHY_INTR 0x02 /* if not set use a timer */ -#define GFAR_HAS_GIGABIT 0x01 - -/* Data structure for I2C support. Just contains a couple flags - * to distinguish various I2C implementations*/ -struct ocp_fs_i2c_data { - uint flags; -}; - -/* Flags for I2C */ -#define FS_I2C_SEPARATE_DFSRR 0x02 -#define FS_I2C_CLOCK_5200 0x01 - -#endif /* __ASM_FS_OCP_H__ */ -#endif /* __KERNEL__ */ diff --git a/include/asm-ppc/ocp.h b/include/asm-ppc/ocp.h index c726f184519..983116f59d9 100644 --- a/include/asm-ppc/ocp.h +++ b/include/asm-ppc/ocp.h @@ -202,10 +202,6 @@ static DEVICE_ATTR(name##_##field, S_IRUGO, show_##name##_##field, NULL); #include #endif -#ifdef CONFIG_FSL_OCP -#include -#endif - #endif /* CONFIG_PPC_OCP */ #endif /* __OCP_H__ */ #endif /* __KERNEL__ */ -- cgit v1.2.3 From 912eaa7198827df3cae7d0c9768fd08e84a09675 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Sat, 25 Jun 2005 14:54:39 -0700 Subject: [PATCH] I2C-MPC: Remove OCP device model support All consumers of the driver MPC10x, MPC52xx, MPC824x, MPC83xx, and MPC85xx are all using platform devices. We can get ride of the dead code to support using this driver with the old OCP based model Signed-off-by: Kumar Gala Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/i2c/busses/i2c-mpc.c | 204 ------------------------------------------- 1 file changed, 204 deletions(-) diff --git a/drivers/i2c/busses/i2c-mpc.c b/drivers/i2c/busses/i2c-mpc.c index d41ca31dbcb..03c23ce98ed 100644 --- a/drivers/i2c/busses/i2c-mpc.c +++ b/drivers/i2c/busses/i2c-mpc.c @@ -20,13 +20,7 @@ #include #include #include -#ifdef CONFIG_FSL_OCP -#include -#define FSL_I2C_DEV_SEPARATE_DFSRR FS_I2C_SEPARATE_DFSRR -#define FSL_I2C_DEV_CLOCK_5200 FS_I2C_CLOCK_5200 -#else #include -#endif #include #include #include @@ -294,204 +288,6 @@ static struct i2c_adapter mpc_ops = { .retries = 1 }; -#ifdef CONFIG_FSL_OCP -static int __devinit mpc_i2c_probe(struct ocp_device *ocp) -{ - int result = 0; - struct mpc_i2c *i2c; - - if (!(i2c = kmalloc(sizeof(*i2c), GFP_KERNEL))) { - return -ENOMEM; - } - memset(i2c, 0, sizeof(*i2c)); - - i2c->irq = ocp->def->irq; - i2c->flags = ((struct ocp_fs_i2c_data *)ocp->def->additions)->flags; - init_waitqueue_head(&i2c->queue); - - if (!request_mem_region(ocp->def->paddr, MPC_I2C_REGION, "i2c-mpc")) { - printk(KERN_ERR "i2c-mpc - resource unavailable\n"); - return -ENODEV; - } - - i2c->base = ioremap(ocp->def->paddr, MPC_I2C_REGION); - - if (!i2c->base) { - printk(KERN_ERR "i2c-mpc - failed to map controller\n"); - result = -ENOMEM; - goto fail_map; - } - - if (i2c->irq != OCP_IRQ_NA) - { - if ((result = request_irq(ocp->def->irq, mpc_i2c_isr, - SA_SHIRQ, "i2c-mpc", i2c)) < 0) { - printk(KERN_ERR - "i2c-mpc - failed to attach interrupt\n"); - goto fail_irq; - } - } else - i2c->irq = 0; - - mpc_i2c_setclock(i2c); - ocp_set_drvdata(ocp, i2c); - - i2c->adap = mpc_ops; - i2c_set_adapdata(&i2c->adap, i2c); - - if ((result = i2c_add_adapter(&i2c->adap)) < 0) { - printk(KERN_ERR "i2c-mpc - failed to add adapter\n"); - goto fail_add; - } - - return result; - - fail_add: - if (ocp->def->irq != OCP_IRQ_NA) - free_irq(ocp->def->irq, 0); - fail_irq: - iounmap(i2c->base); - fail_map: - release_mem_region(ocp->def->paddr, MPC_I2C_REGION); - kfree(i2c); - return result; -} -static void __devexit mpc_i2c_remove(struct ocp_device *ocp) -{ - struct mpc_i2c *i2c = ocp_get_drvdata(ocp); - i2c_del_adapter(&i2c->adap); - ocp_set_drvdata(ocp, NULL); - - if (ocp->def->irq != OCP_IRQ_NA) - free_irq(i2c->irq, i2c); - iounmap(i2c->base); - release_mem_region(ocp->def->paddr, MPC_I2C_REGION); - kfree(i2c); -} - -static struct ocp_device_id mpc_iic_ids[] __devinitdata = { - {.vendor = OCP_VENDOR_FREESCALE,.function = OCP_FUNC_IIC}, - {.vendor = OCP_VENDOR_INVALID} -}; - -MODULE_DEVICE_TABLE(ocp, mpc_iic_ids); - -static struct ocp_driver mpc_iic_driver = { - .name = "iic", - .id_table = mpc_iic_ids, - .probe = mpc_i2c_probe, - .remove = __devexit_p(mpc_i2c_remove) -}; - -static int __init iic_init(void) -{ - return ocp_register_driver(&mpc_iic_driver); -} - -static void __exit iic_exit(void) -{ - ocp_unregister_driver(&mpc_iic_driver); -} - -module_init(iic_init); -module_exit(iic_exit); -#else -static int fsl_i2c_probe(struct device *device) -{ - int result = 0; - struct mpc_i2c *i2c; - struct platform_device *pdev = to_platform_device(device); - struct fsl_i2c_platform_data *pdata; - struct resource *r = platform_get_resource(pdev, IORESOURCE_MEM, 0); - - pdata = (struct fsl_i2c_platform_data *) pdev->dev.platform_data; - - if (!(i2c = kmalloc(sizeof(*i2c), GFP_KERNEL))) { - return -ENOMEM; - } - memset(i2c, 0, sizeof(*i2c)); - - i2c->irq = platform_get_irq(pdev, 0); - i2c->flags = pdata->device_flags; - init_waitqueue_head(&i2c->queue); - - i2c->base = ioremap((phys_addr_t)r->start, MPC_I2C_REGION); - - if (!i2c->base) { - printk(KERN_ERR "i2c-mpc - failed to map controller\n"); - result = -ENOMEM; - goto fail_map; - } - - if (i2c->irq != 0) - if ((result = request_irq(i2c->irq, mpc_i2c_isr, - SA_SHIRQ, "i2c-mpc", i2c)) < 0) { - printk(KERN_ERR - "i2c-mpc - failed to attach interrupt\n"); - goto fail_irq; - } - - mpc_i2c_setclock(i2c); - dev_set_drvdata(device, i2c); - - i2c->adap = mpc_ops; - i2c_set_adapdata(&i2c->adap, i2c); - i2c->adap.dev.parent = &pdev->dev; - if ((result = i2c_add_adapter(&i2c->adap)) < 0) { - printk(KERN_ERR "i2c-mpc - failed to add adapter\n"); - goto fail_add; - } - - return result; - - fail_add: - if (i2c->irq != 0) - free_irq(i2c->irq, NULL); - fail_irq: - iounmap(i2c->base); - fail_map: - kfree(i2c); - return result; -}; - -static int fsl_i2c_remove(struct device *device) -{ - struct mpc_i2c *i2c = dev_get_drvdata(device); - - i2c_del_adapter(&i2c->adap); - dev_set_drvdata(device, NULL); - - if (i2c->irq != 0) - free_irq(i2c->irq, i2c); - - iounmap(i2c->base); - kfree(i2c); - return 0; -}; - -/* Structure for a device driver */ -static struct device_driver fsl_i2c_driver = { - .name = "fsl-i2c", - .bus = &platform_bus_type, - .probe = fsl_i2c_probe, - .remove = fsl_i2c_remove, -}; - -static int __init fsl_i2c_init(void) -{ - return driver_register(&fsl_i2c_driver); -} - -static void __exit fsl_i2c_exit(void) -{ - driver_unregister(&fsl_i2c_driver); -} - -module_init(fsl_i2c_init); -module_exit(fsl_i2c_exit); - -#endif /* CONFIG_FSL_OCP */ - MODULE_AUTHOR("Adrian Cox "); MODULE_DESCRIPTION ("I2C-Bus adapter for MPC107 bridge and MPC824x/85xx/52xx processors"); -- cgit v1.2.3 From 2ec19faf617e61d56fb8da78ccb3ef895c00fae7 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Sat, 25 Jun 2005 14:54:41 -0700 Subject: [PATCH] ppc32: remove some unnecessary includes of bootmem.h Continue the Good Fight: Limit bootmem.h include creep. Signed-off-by: Jon Loeliger Signed-off-by: Kumar Gala Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc/mm/44x_mmu.c | 1 - arch/ppc/mm/4xx_mmu.c | 1 - arch/ppc/mm/fsl_booke_mmu.c | 1 - arch/ppc/platforms/chrp_pci.c | 1 - arch/ppc/platforms/katana.c | 2 +- arch/ppc/platforms/pmac_pci.c | 1 - arch/ppc/syslib/cpm2_common.c | 2 +- arch/ppc/syslib/indirect_pci.c | 1 - arch/ppc/syslib/mv64x60.c | 1 - arch/ppc/syslib/mv64x60_win.c | 1 - 10 files changed, 2 insertions(+), 10 deletions(-) diff --git a/arch/ppc/mm/44x_mmu.c b/arch/ppc/mm/44x_mmu.c index 72f7c0d1c0e..3d79ce281b6 100644 --- a/arch/ppc/mm/44x_mmu.c +++ b/arch/ppc/mm/44x_mmu.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include diff --git a/arch/ppc/mm/4xx_mmu.c b/arch/ppc/mm/4xx_mmu.c index a7f61614038..b7bcbc232f3 100644 --- a/arch/ppc/mm/4xx_mmu.c +++ b/arch/ppc/mm/4xx_mmu.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include diff --git a/arch/ppc/mm/fsl_booke_mmu.c b/arch/ppc/mm/fsl_booke_mmu.c index 9fa884de5c7..af9ca0eb6d5 100644 --- a/arch/ppc/mm/fsl_booke_mmu.c +++ b/arch/ppc/mm/fsl_booke_mmu.c @@ -41,7 +41,6 @@ #include #include #include -#include #include #include diff --git a/arch/ppc/platforms/chrp_pci.c b/arch/ppc/platforms/chrp_pci.c index 7d0ee308f66..7d3fbb5c5db 100644 --- a/arch/ppc/platforms/chrp_pci.c +++ b/arch/ppc/platforms/chrp_pci.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include diff --git a/arch/ppc/platforms/katana.c b/arch/ppc/platforms/katana.c index eda922ac316..169dbf6534b 100644 --- a/arch/ppc/platforms/katana.c +++ b/arch/ppc/platforms/katana.c @@ -27,12 +27,12 @@ #include #include #include -#include #include #include #ifdef CONFIG_BOOTIMG #include #endif +#include #include #include #include diff --git a/arch/ppc/platforms/pmac_pci.c b/arch/ppc/platforms/pmac_pci.c index f6ff5192406..719fb49fe2b 100644 --- a/arch/ppc/platforms/pmac_pci.c +++ b/arch/ppc/platforms/pmac_pci.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/arch/ppc/syslib/cpm2_common.c b/arch/ppc/syslib/cpm2_common.c index ea5e77080e8..4c19a4ac716 100644 --- a/arch/ppc/syslib/cpm2_common.c +++ b/arch/ppc/syslib/cpm2_common.c @@ -21,8 +21,8 @@ #include #include #include -#include #include +#include #include #include #include diff --git a/arch/ppc/syslib/indirect_pci.c b/arch/ppc/syslib/indirect_pci.c index a5a752609e2..e7148846970 100644 --- a/arch/ppc/syslib/indirect_pci.c +++ b/arch/ppc/syslib/indirect_pci.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include diff --git a/arch/ppc/syslib/mv64x60.c b/arch/ppc/syslib/mv64x60.c index 7b241e7876b..cc77177fa1c 100644 --- a/arch/ppc/syslib/mv64x60.c +++ b/arch/ppc/syslib/mv64x60.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/arch/ppc/syslib/mv64x60_win.c b/arch/ppc/syslib/mv64x60_win.c index b6f0f5dcf6e..5b827e2bbe2 100644 --- a/arch/ppc/syslib/mv64x60_win.c +++ b/arch/ppc/syslib/mv64x60_win.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include -- cgit v1.2.3 From 856509d5da74aefeabe92a8df039cc472f4f7c5f Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Sat, 25 Jun 2005 14:54:42 -0700 Subject: [PATCH] ppc64: Fix compile warnings in arch/ppc64/kernel/lparcfg.c Stephen's patch to remove LparData.h missed an include in lparcfg.c This fixes a few compile warnings. Signed-off-by: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/lparcfg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/ppc64/kernel/lparcfg.c b/arch/ppc64/kernel/lparcfg.c index 387923fcf9b..02e96627fa6 100644 --- a/arch/ppc64/kernel/lparcfg.c +++ b/arch/ppc64/kernel/lparcfg.c @@ -34,6 +34,7 @@ #include #include #include +#include #define MODULE_VERS "1.6" #define MODULE_NAME "lparcfg" -- cgit v1.2.3 From aea00143a8db8c0b31dca85bff3c325444d93f0f Mon Sep 17 00:00:00 2001 From: Andrey Panin Date: Sat, 25 Jun 2005 14:54:42 -0700 Subject: [PATCH] dmi: move ACPI boot quirk This patch moves ACPI boot quirks out of dmi_scan.c Signed-off-by: Andrey Panin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/acpi/boot.c | 219 ++++++++++++++++++++++++++++++++++++++++++- arch/i386/kernel/dmi_scan.c | 163 -------------------------------- 2 files changed, 217 insertions(+), 165 deletions(-) diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index 848bb97af7c..0771596c549 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -815,6 +816,218 @@ acpi_process_madt(void) return; } +extern int acpi_force; + +#ifdef __i386__ + +#ifdef CONFIG_ACPI_PCI +static int __init disable_acpi_irq(struct dmi_system_id *d) +{ + if (!acpi_force) { + printk(KERN_NOTICE "%s detected: force use of acpi=noirq\n", + d->ident); + acpi_noirq_set(); + } + return 0; +} + +static int __init disable_acpi_pci(struct dmi_system_id *d) +{ + if (!acpi_force) { + printk(KERN_NOTICE "%s detected: force use of pci=noacpi\n", + d->ident); + acpi_disable_pci(); + } + return 0; +} +#endif + +static int __init dmi_disable_acpi(struct dmi_system_id *d) +{ + if (!acpi_force) { + printk(KERN_NOTICE "%s detected: acpi off\n",d->ident); + disable_acpi(); + } else { + printk(KERN_NOTICE + "Warning: DMI blacklist says broken, but acpi forced\n"); + } + return 0; +} + +/* + * Limit ACPI to CPU enumeration for HT + */ +static int __init force_acpi_ht(struct dmi_system_id *d) +{ + if (!acpi_force) { + printk(KERN_NOTICE "%s detected: force use of acpi=ht\n", d->ident); + disable_acpi(); + acpi_ht = 1; + } else { + printk(KERN_NOTICE + "Warning: acpi=force overrules DMI blacklist: acpi=ht\n"); + } + return 0; +} + +/* + * If your system is blacklisted here, but you find that acpi=force + * works for you, please contact acpi-devel@sourceforge.net + */ +static struct dmi_system_id __initdata acpi_dmi_table[] = { + /* + * Boxes that need ACPI disabled + */ + { + .callback = dmi_disable_acpi, + .ident = "IBM Thinkpad", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_BOARD_NAME, "2629H1G"), + }, + }, + + /* + * Boxes that need acpi=ht + */ + { + .callback = force_acpi_ht, + .ident = "FSC Primergy T850", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), + DMI_MATCH(DMI_PRODUCT_NAME, "PRIMERGY T850"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "DELL GX240", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Dell Computer Corporation"), + DMI_MATCH(DMI_BOARD_NAME, "OptiPlex GX240"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "HP VISUALIZE NT Workstation", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP VISUALIZE NT Workstation"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "Compaq Workstation W8000", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Compaq"), + DMI_MATCH(DMI_PRODUCT_NAME, "Workstation W8000"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "ASUS P4B266", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "P4B266"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "ASUS P2B-DS", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "P2B-DS"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "ASUS CUR-DLS", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "CUR-DLS"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "ABIT i440BX-W83977", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ABIT "), + DMI_MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "IBM Bladecenter", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_BOARD_NAME, "IBM eServer BladeCenter HS20"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "IBM eServer xSeries 360", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_BOARD_NAME, "eServer xSeries 360"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "IBM eserver xSeries 330", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_BOARD_NAME, "eserver xSeries 330"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "IBM eserver xSeries 440", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_PRODUCT_NAME, "eserver xSeries 440"), + }, + }, + +#ifdef CONFIG_ACPI_PCI + /* + * Boxes that need ACPI PCI IRQ routing disabled + */ + { + .callback = disable_acpi_irq, + .ident = "ASUS A7V", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC"), + DMI_MATCH(DMI_BOARD_NAME, ""), + /* newer BIOS, Revision 1011, does work */ + DMI_MATCH(DMI_BIOS_VERSION, "ASUS A7V ACPI BIOS Revision 1007"), + }, + }, + + /* + * Boxes that need ACPI PCI IRQ routing and PCI scan disabled + */ + { /* _BBN 0 bug */ + .callback = disable_acpi_pci, + .ident = "ASUS PR-DLS", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "PR-DLS"), + DMI_MATCH(DMI_BIOS_VERSION, "ASUS PR-DLS ACPI BIOS Revision 1010"), + DMI_MATCH(DMI_BIOS_DATE, "03/21/2003") + }, + }, + { + .callback = disable_acpi_pci, + .ident = "Acer TravelMate 36x Laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), + }, + }, +#endif +}; + +#endif /* __i386__ */ + /* * acpi_boot_table_init() and acpi_boot_init() * called from setup_arch(), always. @@ -843,6 +1056,10 @@ acpi_boot_table_init(void) { int error; +#ifdef __i386__ + dmi_check_system(acpi_dmi_table); +#endif + /* * If acpi_disabled, bail out * One exception: acpi=ht continues far enough to enumerate LAPICs @@ -870,8 +1087,6 @@ acpi_boot_table_init(void) */ error = acpi_blacklisted(); if (error) { - extern int acpi_force; - if (acpi_force) { printk(KERN_WARNING PREFIX "acpi=force override\n"); } else { diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c index 3facd20212b..b4bae60f506 100644 --- a/arch/i386/kernel/dmi_scan.c +++ b/arch/i386/kernel/dmi_scan.c @@ -188,59 +188,6 @@ static __init int reset_videomode_after_s3(struct dmi_blacklist *d) #endif -#ifdef CONFIG_ACPI_BOOT -extern int acpi_force; - -static __init __attribute__((unused)) int dmi_disable_acpi(struct dmi_blacklist *d) -{ - if (!acpi_force) { - printk(KERN_NOTICE "%s detected: acpi off\n",d->ident); - disable_acpi(); - } else { - printk(KERN_NOTICE - "Warning: DMI blacklist says broken, but acpi forced\n"); - } - return 0; -} - -/* - * Limit ACPI to CPU enumeration for HT - */ -static __init __attribute__((unused)) int force_acpi_ht(struct dmi_blacklist *d) -{ - if (!acpi_force) { - printk(KERN_NOTICE "%s detected: force use of acpi=ht\n", d->ident); - disable_acpi(); - acpi_ht = 1; - } else { - printk(KERN_NOTICE - "Warning: acpi=force overrules DMI blacklist: acpi=ht\n"); - } - return 0; -} -#endif - -#ifdef CONFIG_ACPI_PCI -static __init int disable_acpi_irq(struct dmi_blacklist *d) -{ - if (!acpi_force) { - printk(KERN_NOTICE "%s detected: force use of acpi=noirq\n", - d->ident); - acpi_noirq_set(); - } - return 0; -} -static __init int disable_acpi_pci(struct dmi_blacklist *d) -{ - if (!acpi_force) { - printk(KERN_NOTICE "%s detected: force use of pci=noacpi\n", - d->ident); - acpi_disable_pci(); - } - return 0; -} -#endif - /* * Process the DMI blacklists */ @@ -264,116 +211,6 @@ static __initdata struct dmi_blacklist dmi_blacklist[]={ } }, #endif -#ifdef CONFIG_ACPI_BOOT - /* - * If your system is blacklisted here, but you find that acpi=force - * works for you, please contact acpi-devel@sourceforge.net - */ - - /* - * Boxes that need ACPI disabled - */ - - { dmi_disable_acpi, "IBM Thinkpad", { - MATCH(DMI_BOARD_VENDOR, "IBM"), - MATCH(DMI_BOARD_NAME, "2629H1G"), - NO_MATCH, NO_MATCH }}, - - /* - * Boxes that need acpi=ht - */ - - { force_acpi_ht, "FSC Primergy T850", { - MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), - MATCH(DMI_PRODUCT_NAME, "PRIMERGY T850"), - NO_MATCH, NO_MATCH }}, - - { force_acpi_ht, "DELL GX240", { - MATCH(DMI_BOARD_VENDOR, "Dell Computer Corporation"), - MATCH(DMI_BOARD_NAME, "OptiPlex GX240"), - NO_MATCH, NO_MATCH }}, - - { force_acpi_ht, "HP VISUALIZE NT Workstation", { - MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"), - MATCH(DMI_PRODUCT_NAME, "HP VISUALIZE NT Workstation"), - NO_MATCH, NO_MATCH }}, - - { force_acpi_ht, "Compaq Workstation W8000", { - MATCH(DMI_SYS_VENDOR, "Compaq"), - MATCH(DMI_PRODUCT_NAME, "Workstation W8000"), - NO_MATCH, NO_MATCH }}, - - { force_acpi_ht, "ASUS P4B266", { - MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), - MATCH(DMI_BOARD_NAME, "P4B266"), - NO_MATCH, NO_MATCH }}, - - { force_acpi_ht, "ASUS P2B-DS", { - MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), - MATCH(DMI_BOARD_NAME, "P2B-DS"), - NO_MATCH, NO_MATCH }}, - - { force_acpi_ht, "ASUS CUR-DLS", { - MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), - MATCH(DMI_BOARD_NAME, "CUR-DLS"), - NO_MATCH, NO_MATCH }}, - - { force_acpi_ht, "ABIT i440BX-W83977", { - MATCH(DMI_BOARD_VENDOR, "ABIT "), - MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"), - NO_MATCH, NO_MATCH }}, - - { force_acpi_ht, "IBM Bladecenter", { - MATCH(DMI_BOARD_VENDOR, "IBM"), - MATCH(DMI_BOARD_NAME, "IBM eServer BladeCenter HS20"), - NO_MATCH, NO_MATCH }}, - - { force_acpi_ht, "IBM eServer xSeries 360", { - MATCH(DMI_BOARD_VENDOR, "IBM"), - MATCH(DMI_BOARD_NAME, "eServer xSeries 360"), - NO_MATCH, NO_MATCH }}, - - { force_acpi_ht, "IBM eserver xSeries 330", { - MATCH(DMI_BOARD_VENDOR, "IBM"), - MATCH(DMI_BOARD_NAME, "eserver xSeries 330"), - NO_MATCH, NO_MATCH }}, - - { force_acpi_ht, "IBM eserver xSeries 440", { - MATCH(DMI_BOARD_VENDOR, "IBM"), - MATCH(DMI_PRODUCT_NAME, "eserver xSeries 440"), - NO_MATCH, NO_MATCH }}, - -#endif // CONFIG_ACPI_BOOT - -#ifdef CONFIG_ACPI_PCI - /* - * Boxes that need ACPI PCI IRQ routing disabled - */ - - { disable_acpi_irq, "ASUS A7V", { - MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC"), - MATCH(DMI_BOARD_NAME, ""), - /* newer BIOS, Revision 1011, does work */ - MATCH(DMI_BIOS_VERSION, "ASUS A7V ACPI BIOS Revision 1007"), - NO_MATCH }}, - - /* - * Boxes that need ACPI PCI IRQ routing and PCI scan disabled - */ - { disable_acpi_pci, "ASUS PR-DLS", { /* _BBN 0 bug */ - MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), - MATCH(DMI_BOARD_NAME, "PR-DLS"), - MATCH(DMI_BIOS_VERSION, "ASUS PR-DLS ACPI BIOS Revision 1010"), - MATCH(DMI_BIOS_DATE, "03/21/2003") }}, - - { disable_acpi_pci, "Acer TravelMate 36x Laptop", { - MATCH(DMI_SYS_VENDOR, "Acer"), - MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), - NO_MATCH, NO_MATCH - } }, - -#endif - { NULL, } }; -- cgit v1.2.3 From 0f8133a8db81ff824a4abbe5bb0f15bf034d31c3 Mon Sep 17 00:00:00 2001 From: Andrey Panin Date: Sat, 25 Jun 2005 14:54:45 -0700 Subject: [PATCH] dmi: move ACPI sleep quirk This patch moves ACPI sleep quirk out of dmi_scan.c Signed-off-by: Andrey Panin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/acpi/boot.c | 1 + arch/i386/kernel/acpi/sleep.c | 27 +++++++++++++++++++++++++++ arch/i386/kernel/dmi_scan.c | 16 ---------------- 3 files changed, 28 insertions(+), 16 deletions(-) diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index 0771596c549..9f63ae0f404 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -1024,6 +1024,7 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { }, }, #endif + { } }; #endif /* __i386__ */ diff --git a/arch/i386/kernel/acpi/sleep.c b/arch/i386/kernel/acpi/sleep.c index 28bb0514bb6..c1af93032ff 100644 --- a/arch/i386/kernel/acpi/sleep.c +++ b/arch/i386/kernel/acpi/sleep.c @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -91,3 +92,29 @@ static int __init acpi_sleep_setup(char *str) __setup("acpi_sleep=", acpi_sleep_setup); + + +static __init int reset_videomode_after_s3(struct dmi_system_id *d) +{ + acpi_video_flags |= 2; + return 0; +} + +static __initdata struct dmi_system_id acpisleep_dmi_table[] = { + { /* Reset video mode after returning from ACPI S3 sleep */ + .callback = reset_videomode_after_s3, + .ident = "Toshiba Satellite 4030cdt", + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"), + }, + }, + { } +}; + +static int __init acpisleep_dmi_init(void) +{ + dmi_check_system(acpisleep_dmi_table); + return 0; +} + +core_initcall(acpisleep_dmi_init); diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c index b4bae60f506..065c30a73c1 100644 --- a/arch/i386/kernel/dmi_scan.c +++ b/arch/i386/kernel/dmi_scan.c @@ -177,16 +177,6 @@ static __init int broken_toshiba_keyboard(struct dmi_blacklist *d) } -#ifdef CONFIG_ACPI_SLEEP -static __init int reset_videomode_after_s3(struct dmi_blacklist *d) -{ - /* See acpi_wakeup.S */ - extern long acpi_video_flags; - acpi_video_flags |= 2; - return 0; -} -#endif - /* * Process the DMI blacklists @@ -204,12 +194,6 @@ static __initdata struct dmi_blacklist dmi_blacklist[]={ MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"), NO_MATCH, NO_MATCH, NO_MATCH } }, -#ifdef CONFIG_ACPI_SLEEP - { reset_videomode_after_s3, "Toshiba Satellite 4030cdt", { /* Reset video mode after returning from ACPI S3 sleep */ - MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"), - NO_MATCH, NO_MATCH, NO_MATCH - } }, -#endif { NULL, } }; -- cgit v1.2.3 From b625883f24d018c989173aeb727f6de954fb154d Mon Sep 17 00:00:00 2001 From: Andrey Panin Date: Sat, 25 Jun 2005 14:54:46 -0700 Subject: [PATCH] dmi: remove central blacklist Since last dmi quirk looks useless (it just prints 404 compliant url) we can finally remove central dmi blacklist. Signed-off-by: Andrey Panin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/dmi_scan.c | 45 +-------------------------------------------- 1 file changed, 1 insertion(+), 44 deletions(-) diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c index 065c30a73c1..ad4b369a583 100644 --- a/arch/i386/kernel/dmi_scan.c +++ b/arch/i386/kernel/dmi_scan.c @@ -159,51 +159,11 @@ static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string) printk(KERN_ERR "dmi_save_ident: out of memory.\n"); } -/* - * Ugly compatibility crap. - */ -#define dmi_blacklist dmi_system_id -#define NO_MATCH { DMI_NONE, NULL} -#define MATCH DMI_MATCH - -/* - * Toshiba keyboard likes to repeat keys when they are not repeated. - */ - -static __init int broken_toshiba_keyboard(struct dmi_blacklist *d) -{ - printk(KERN_WARNING "Toshiba with broken keyboard detected. If your keyboard sometimes generates 3 keypresses instead of one, see http://davyd.ucc.asn.au/projects/toshiba/README\n"); - return 0; -} - - - -/* - * Process the DMI blacklists - */ - - -/* - * This will be expanded over time to force things like the APM - * interrupt mask settings according to the laptop - */ - -static __initdata struct dmi_blacklist dmi_blacklist[]={ - - { broken_toshiba_keyboard, "Toshiba Satellite 4030cdt", { /* Keyboard generates spurious repeats */ - MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"), - NO_MATCH, NO_MATCH, NO_MATCH - } }, - - { NULL, } -}; - /* * Process a DMI table entry. Right now all we care about are the BIOS * and machine entries. For 2.5 we should pull the smbus controller info * out of here. */ - static void __init dmi_decode(struct dmi_header *dm) { #ifdef DMI_DEBUG @@ -253,10 +213,7 @@ static void __init dmi_decode(struct dmi_header *dm) void __init dmi_scan_machine(void) { - int err = dmi_iterate(dmi_decode); - if(err == 0) - dmi_check_system(dmi_blacklist); - else + if (dmi_iterate(dmi_decode)) printk(KERN_INFO "DMI not present.\n"); } -- cgit v1.2.3 From 1249c5138f573890eae0c01f13d627094edcd55c Mon Sep 17 00:00:00 2001 From: Andrey Panin Date: Sat, 25 Jun 2005 14:54:47 -0700 Subject: [PATCH] dmi: spring cleanup Whitespace and CodingStyle cleanup. No functionality changes. Signed-off-by: Andrey Panin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/dmi_scan.c | 163 +++++++++++++++++++------------------------- 1 file changed, 70 insertions(+), 93 deletions(-) diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c index ad4b369a583..a3cdf894302 100644 --- a/arch/i386/kernel/dmi_scan.c +++ b/arch/i386/kernel/dmi_scan.c @@ -1,22 +1,15 @@ #include -#include #include #include #include -#include -#include -#include -#include -#include #include #include -struct dmi_header -{ - u8 type; - u8 length; - u16 handle; +struct dmi_header { + u8 type; + u8 length; + u16 handle; }; #undef DMI_DEBUG @@ -29,15 +22,13 @@ struct dmi_header static char * __init dmi_string(struct dmi_header *dm, u8 s) { - u8 *bp=(u8 *)dm; - bp+=dm->length; - if(!s) + u8 *bp = ((u8 *) dm) + dm->length; + + if (!s) return ""; s--; - while(s>0 && *bp) - { - bp+=strlen(bp); - bp++; + while (s > 0 && *bp) { + bp += strlen(bp) + 1; s--; } return bp; @@ -47,16 +38,14 @@ static char * __init dmi_string(struct dmi_header *dm, u8 s) * We have to be cautious here. We have seen BIOSes with DMI pointers * pointing to completely the wrong place for example */ - -static int __init dmi_table(u32 base, int len, int num, void (*decode)(struct dmi_header *)) +static int __init dmi_table(u32 base, int len, int num, + void (*decode)(struct dmi_header *)) { - u8 *buf; - struct dmi_header *dm; - u8 *data; - int i=0; + u8 *buf, *data; + int i = 0; buf = bt_ioremap(base, len); - if(buf==NULL) + if (buf == NULL) return -1; data = buf; @@ -65,36 +54,34 @@ static int __init dmi_table(u32 base, int len, int num, void (*decode)(struct dm * Stop when we see all the items the table claimed to have * OR we run off the end of the table (also happens) */ - - while(ilength; - while(data-buflength; + while ((data - buf < len - 1) && (data[0] || data[1])) data++; - if(data-buf>4, buf[14]&0x0F); + buf[14] >> 4, buf[14] & 0xF); else printk(KERN_INFO "DMI present.\n"); + dmi_printk((KERN_INFO "%d structures occupying %d bytes.\n", num, len)); - dmi_printk((KERN_INFO "DMI table at 0x%08X.\n", - base)); - if(dmi_table(base,len, num, decode)==0) + dmi_printk((KERN_INFO "DMI table at 0x%08X.\n", base)); + + if (dmi_table(base,len, num, decode) == 0) return 0; } } @@ -143,16 +132,17 @@ static char *dmi_ident[DMI_STRING_MAX]; /* * Save a DMI string */ - static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string) { char *d = (char*)dm; char *p = dmi_string(dm, d[string]); - if(p==NULL || *p == 0) + + if (p == NULL || *p == 0) return; if (dmi_ident[slot]) return; - dmi_ident[slot] = alloc_bootmem(strlen(p)+1); + + dmi_ident[slot] = alloc_bootmem(strlen(p) + 1); if(dmi_ident[slot]) strcpy(dmi_ident[slot], p); else @@ -166,48 +156,35 @@ static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string) */ static void __init dmi_decode(struct dmi_header *dm) { -#ifdef DMI_DEBUG - u8 *data = (u8 *)dm; -#endif + u8 *data __attribute__((__unused__)) = (u8 *)dm; - switch(dm->type) - { - case 0: - dmi_printk(("BIOS Vendor: %s\n", - dmi_string(dm, data[4]))); - dmi_save_ident(dm, DMI_BIOS_VENDOR, 4); - dmi_printk(("BIOS Version: %s\n", - dmi_string(dm, data[5]))); - dmi_save_ident(dm, DMI_BIOS_VERSION, 5); - dmi_printk(("BIOS Release: %s\n", - dmi_string(dm, data[8]))); - dmi_save_ident(dm, DMI_BIOS_DATE, 8); - break; - case 1: - dmi_printk(("System Vendor: %s\n", - dmi_string(dm, data[4]))); - dmi_save_ident(dm, DMI_SYS_VENDOR, 4); - dmi_printk(("Product Name: %s\n", - dmi_string(dm, data[5]))); - dmi_save_ident(dm, DMI_PRODUCT_NAME, 5); - dmi_printk(("Version: %s\n", - dmi_string(dm, data[6]))); - dmi_save_ident(dm, DMI_PRODUCT_VERSION, 6); - dmi_printk(("Serial Number: %s\n", - dmi_string(dm, data[7]))); - dmi_save_ident(dm, DMI_PRODUCT_SERIAL, 7); - break; - case 2: - dmi_printk(("Board Vendor: %s\n", - dmi_string(dm, data[4]))); - dmi_save_ident(dm, DMI_BOARD_VENDOR, 4); - dmi_printk(("Board Name: %s\n", - dmi_string(dm, data[5]))); - dmi_save_ident(dm, DMI_BOARD_NAME, 5); - dmi_printk(("Board Version: %s\n", - dmi_string(dm, data[6]))); - dmi_save_ident(dm, DMI_BOARD_VERSION, 6); - break; + switch(dm->type) { + case 0: + dmi_printk(("BIOS Vendor: %s\n", dmi_string(dm, data[4]))); + dmi_save_ident(dm, DMI_BIOS_VENDOR, 4); + dmi_printk(("BIOS Version: %s\n", dmi_string(dm, data[5]))); + dmi_save_ident(dm, DMI_BIOS_VERSION, 5); + dmi_printk(("BIOS Release: %s\n", dmi_string(dm, data[8]))); + dmi_save_ident(dm, DMI_BIOS_DATE, 8); + break; + case 1: + dmi_printk(("System Vendor: %s\n", dmi_string(dm, data[4]))); + dmi_save_ident(dm, DMI_SYS_VENDOR, 4); + dmi_printk(("Product Name: %s\n", dmi_string(dm, data[5]))); + dmi_save_ident(dm, DMI_PRODUCT_NAME, 5); + dmi_printk(("Version: %s\n", dmi_string(dm, data[6]))); + dmi_save_ident(dm, DMI_PRODUCT_VERSION, 6); + dmi_printk(("Serial Number: %s\n", dmi_string(dm, data[7]))); + dmi_save_ident(dm, DMI_PRODUCT_SERIAL, 7); + break; + case 2: + dmi_printk(("Board Vendor: %s\n", dmi_string(dm, data[4]))); + dmi_save_ident(dm, DMI_BOARD_VENDOR, 4); + dmi_printk(("Board Name: %s\n", dmi_string(dm, data[5]))); + dmi_save_ident(dm, DMI_BOARD_NAME, 5); + dmi_printk(("Board Version: %s\n", dmi_string(dm, data[6]))); + dmi_save_ident(dm, DMI_BOARD_VERSION, 6); + break; } } -- cgit v1.2.3 From a13db56624c2a9d6c0dae0a693b25b0e58de9ea3 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Sat, 25 Jun 2005 14:54:48 -0700 Subject: [PATCH] CPU hotplug: fix hpet sectioning With hpet enabled, cpu hotplug uses some routines marked with __init. Signed-off-by: Shaohua Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/time_hpet.c | 2 +- arch/i386/kernel/timers/common.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/i386/kernel/time_hpet.c b/arch/i386/kernel/time_hpet.c index 10a0cbb88e7..658c0629ba6 100644 --- a/arch/i386/kernel/time_hpet.c +++ b/arch/i386/kernel/time_hpet.c @@ -50,7 +50,7 @@ static void hpet_writel(unsigned long d, unsigned long a) * comparator value and continue. Next tick can be caught by checking * for a change in the comparator value. Used in apic.c. */ -static void __init wait_hpet_tick(void) +static void __devinit wait_hpet_tick(void) { unsigned int start_cmp_val, end_cmp_val; diff --git a/arch/i386/kernel/timers/common.c b/arch/i386/kernel/timers/common.c index 37353bd3180..8163fe0cf1f 100644 --- a/arch/i386/kernel/timers/common.c +++ b/arch/i386/kernel/timers/common.c @@ -86,7 +86,7 @@ bad_ctc: #define CALIBRATE_CNT_HPET (5 * hpet_tick) #define CALIBRATE_TIME_HPET (5 * KERNEL_TICK_USEC) -unsigned long __init calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr) +unsigned long __devinit calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr) { unsigned long tsc_startlow, tsc_starthigh; unsigned long tsc_endlow, tsc_endhigh; -- cgit v1.2.3 From d92de65cab5980c16d4a1c326c1ef9a591892883 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Sat, 25 Jun 2005 14:54:49 -0700 Subject: [PATCH] variable overflow after hundreds round of hotplug CPU I'm doing the cpu hotplug stress test and found a variable ('ready') is overflow after several hundreds rounds of cpu hotplug. Here is a fix. Signed-off-by: Shaohua Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/head.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index e966fc8c44c..4477bb10709 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -299,7 +299,6 @@ is386: movl $2,%ecx # set MP movl %eax,%cr0 call check_x87 - incb ready lgdt cpu_gdt_descr lidt idt_descr ljmp $(__KERNEL_CS),$1f @@ -316,8 +315,9 @@ is386: movl $2,%ecx # set MP lldt %ax cld # gcc2 wants the direction flag cleared at all times #ifdef CONFIG_SMP - movb ready, %cl - cmpb $1,%cl + movb ready, %cl + movb $1, ready + cmpb $0,%cl je 1f # the first CPU calls start_kernel # all other CPUs call initialize_secondary call initialize_secondary -- cgit v1.2.3 From f370513640492641b4046bfd9a6e4714f6ae530d Mon Sep 17 00:00:00 2001 From: Zwane Mwaikambo Date: Sat, 25 Jun 2005 14:54:50 -0700 Subject: [PATCH] i386 CPU hotplug (The i386 CPU hotplug patch provides infrastructure for some work which Pavel is doing as well as for ACPI S3 (suspend-to-RAM) work which Li Shaohua is doing) The following provides i386 architecture support for safely unregistering and registering processors during runtime, updated for the current -mm tree. In order to avoid dumping cpu hotplug code into kernel/irq/* i dropped the cpu_online check in do_IRQ() by modifying fixup_irqs(). The difference being that on cpu offline, fixup_irqs() is called before we clear the cpu from cpu_online_map and a long delay in order to ensure that we never have any queued external interrupts on the APICs. There are additional changes to s390 and ppc64 to account for this change. 1) Add CONFIG_HOTPLUG_CPU 2) disable local APIC timer on dead cpus. 3) Disable preempt around irq balancing to prevent CPUs going down. 4) Print irq stats for all possible cpus. 5) Debugging check for interrupts on offline cpus. 6) Hacky fixup_irqs() to redirect irqs when cpus go off/online. 7) play_dead() for offline cpus to spin inside. 8) Handle offline cpus set in flush_tlb_others(). 9) Grab lock earlier in smp_call_function() to prevent CPUs going down. 10) Implement __cpu_disable() and __cpu_die(). 11) Enable local interrupts in cpu_enable() after fixup_irqs() 12) Don't fiddle with NMI on dead cpu, but leave intact on other cpus. 13) Program IRQ affinity whilst cpu is still in cpu_online_map on offline. Signed-off-by: Zwane Mwaikambo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/Kconfig | 9 ++++ arch/i386/kernel/apic.c | 3 +- arch/i386/kernel/io_apic.c | 2 + arch/i386/kernel/irq.c | 67 ++++++++++++++++++++++------ arch/i386/kernel/process.c | 39 +++++++++++++++- arch/i386/kernel/smp.c | 24 ++++++---- arch/i386/kernel/smpboot.c | 98 ++++++++++++++++++++++++++++++++++++++--- arch/i386/kernel/traps.c | 8 ++++ arch/ia64/kernel/smpboot.c | 1 + arch/ppc64/kernel/pSeries_smp.c | 5 ++- arch/s390/kernel/smp.c | 4 +- include/asm-i386/cpu.h | 2 + include/asm-i386/irq.h | 4 ++ include/asm-i386/smp.h | 3 ++ kernel/cpu.c | 14 +++--- 15 files changed, 243 insertions(+), 40 deletions(-) diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index d4ae5f9ceae..b4cd11e5845 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -1250,6 +1250,15 @@ config SCx200 This support is also available as a module. If compiled as a module, it will be called scx200. +config HOTPLUG_CPU + bool "Support for hot-pluggable CPUs (EXPERIMENTAL)" + depends on SMP && HOTPLUG && EXPERIMENTAL + ---help--- + Say Y here to experiment with turning CPUs off and on. CPUs + can be controlled through /sys/devices/system/cpu. + + Say N. + source "drivers/pcmcia/Kconfig" source "drivers/pci/hotplug/Kconfig" diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 8d993fa7175..a28a088f3e7 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -1048,7 +1049,7 @@ void __init setup_secondary_APIC_clock(void) setup_APIC_timer(calibration_result); } -void __init disable_APIC_timer(void) +void __devinit disable_APIC_timer(void) { if (using_apic_timer) { unsigned long v; diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 08540bc4ba3..3c2b3bdfc80 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -576,9 +576,11 @@ static int balanced_irq(void *unused) try_to_freeze(PF_FREEZE); if (time_after(jiffies, prev_balance_time+balanced_irq_interval)) { + preempt_disable(); do_irq_balance(); prev_balance_time = jiffies; time_remaining = balanced_irq_interval; + preempt_enable(); } } return 0; diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index 73945a3c53c..af115004aec 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c @@ -15,6 +15,9 @@ #include #include #include +#include +#include +#include DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_maxaligned_in_smp; EXPORT_PER_CPU_SYMBOL(irq_stat); @@ -210,9 +213,8 @@ int show_interrupts(struct seq_file *p, void *v) if (i == 0) { seq_printf(p, " "); - for (j=0; jtypename); seq_printf(p, " %s", action->name); @@ -240,16 +241,14 @@ skip: spin_unlock_irqrestore(&irq_desc[i].lock, flags); } else if (i == NR_IRQS) { seq_printf(p, "NMI: "); - for (j = 0; j < NR_CPUS; j++) - if (cpu_online(j)) - seq_printf(p, "%10u ", nmi_count(j)); + for_each_cpu(j) + seq_printf(p, "%10u ", nmi_count(j)); seq_putc(p, '\n'); #ifdef CONFIG_X86_LOCAL_APIC seq_printf(p, "LOC: "); - for (j = 0; j < NR_CPUS; j++) - if (cpu_online(j)) - seq_printf(p, "%10u ", - per_cpu(irq_stat,j).apic_timer_irqs); + for_each_cpu(j) + seq_printf(p, "%10u ", + per_cpu(irq_stat,j).apic_timer_irqs); seq_putc(p, '\n'); #endif seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); @@ -259,3 +258,45 @@ skip: } return 0; } + +#ifdef CONFIG_HOTPLUG_CPU +#include + +void fixup_irqs(cpumask_t map) +{ + unsigned int irq; + static int warned; + + for (irq = 0; irq < NR_IRQS; irq++) { + cpumask_t mask; + if (irq == 2) + continue; + + cpus_and(mask, irq_affinity[irq], map); + if (any_online_cpu(mask) == NR_CPUS) { + printk("Breaking affinity for irq %i\n", irq); + mask = map; + } + if (irq_desc[irq].handler->set_affinity) + irq_desc[irq].handler->set_affinity(irq, mask); + else if (irq_desc[irq].action && !(warned++)) + printk("Cannot set affinity for irq %i\n", irq); + } + +#if 0 + barrier(); + /* Ingo Molnar says: "after the IO-APIC masks have been redirected + [note the nop - the interrupt-enable boundary on x86 is two + instructions from sti] - to flush out pending hardirqs and + IPIs. After this point nothing is supposed to reach this CPU." */ + __asm__ __volatile__("sti; nop; cli"); + barrier(); +#else + /* That doesn't seem sufficient. Give it 1ms. */ + local_irq_enable(); + mdelay(1); + local_irq_disable(); +#endif +} +#endif + diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index aea2ce1145d..c1b11e8df60 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -13,6 +13,7 @@ #include +#include #include #include #include @@ -55,6 +56,9 @@ #include #include +#include +#include + asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); static int hlt_counter; @@ -143,14 +147,44 @@ static void poll_idle (void) } } +#ifdef CONFIG_HOTPLUG_CPU +#include +/* We don't actually take CPU down, just spin without interrupts. */ +static inline void play_dead(void) +{ + /* Ack it */ + __get_cpu_var(cpu_state) = CPU_DEAD; + + /* We shouldn't have to disable interrupts while dead, but + * some interrupts just don't seem to go away, and this makes + * it "work" for testing purposes. */ + /* Death loop */ + while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE) + cpu_relax(); + + local_irq_disable(); + __flush_tlb_all(); + cpu_set(smp_processor_id(), cpu_online_map); + enable_APIC_timer(); + local_irq_enable(); +} +#else +static inline void play_dead(void) +{ + BUG(); +} +#endif /* CONFIG_HOTPLUG_CPU */ + /* * The idle thread. There's no useful work to be * done, so just try to conserve power and have a * low exit latency (ie sit in a loop waiting for * somebody to say that they'd like to reschedule) */ -void cpu_idle (void) +void cpu_idle(void) { + int cpu = raw_smp_processor_id(); + /* endless idle loop with no priority at all */ while (1) { while (!need_resched()) { @@ -165,6 +199,9 @@ void cpu_idle (void) if (!idle) idle = default_idle; + if (cpu_is_offline(cpu)) + play_dead(); + __get_cpu_var(irq_stat).idle_timestamp = jiffies; idle(); } diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index 68be7d0c723..35f521612b2 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -164,7 +165,7 @@ void send_IPI_mask_bitmask(cpumask_t cpumask, int vector) unsigned long flags; local_irq_save(flags); - + WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]); /* * Wait for idle. */ @@ -346,21 +347,21 @@ out: static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, unsigned long va) { - cpumask_t tmp; /* * A couple of (to be removed) sanity checks: * - * - we do not send IPIs to not-yet booted CPUs. * - current CPU must not be in mask * - mask must exist :) */ BUG_ON(cpus_empty(cpumask)); - - cpus_and(tmp, cpumask, cpu_online_map); - BUG_ON(!cpus_equal(cpumask, tmp)); BUG_ON(cpu_isset(smp_processor_id(), cpumask)); BUG_ON(!mm); + /* If a CPU which we ran on has gone down, OK. */ + cpus_and(cpumask, cpumask, cpu_online_map); + if (cpus_empty(cpumask)) + return; + /* * i'm not happy about this global shared spinlock in the * MM hot path, but we'll see how contended it is. @@ -476,6 +477,7 @@ void flush_tlb_all(void) */ void smp_send_reschedule(int cpu) { + WARN_ON(cpu_is_offline(cpu)); send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); } @@ -516,10 +518,15 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic, */ { struct call_data_struct data; - int cpus = num_online_cpus()-1; + int cpus; - if (!cpus) + /* Holding any lock stops cpus from going down. */ + spin_lock(&call_lock); + cpus = num_online_cpus() - 1; + if (!cpus) { + spin_unlock(&call_lock); return 0; + } /* Can deadlock when called with interrupts disabled */ WARN_ON(irqs_disabled()); @@ -531,7 +538,6 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic, if (wait) atomic_set(&data.finished, 0); - spin_lock(&call_lock); call_data = &data; mb(); diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index c20d96d5c15..ad74a46e9ef 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -44,6 +44,9 @@ #include #include #include +#include +#include +#include #include #include @@ -96,6 +99,9 @@ static int trampoline_exec; static void map_cpu_to_logical_apicid(void); +/* State of each CPU. */ +DEFINE_PER_CPU(int, cpu_state) = { 0 }; + /* * Currently trivial. Write the real->protected mode * bootstrap into the page concerned. The caller @@ -1119,6 +1125,9 @@ static void __init smp_boot_cpus(unsigned int max_cpus) who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */ void __init smp_prepare_cpus(unsigned int max_cpus) { + smp_commenced_mask = cpumask_of_cpu(0); + cpu_callin_map = cpumask_of_cpu(0); + mb(); smp_boot_cpus(max_cpus); } @@ -1128,20 +1137,99 @@ void __devinit smp_prepare_boot_cpu(void) cpu_set(smp_processor_id(), cpu_callout_map); } -int __devinit __cpu_up(unsigned int cpu) +#ifdef CONFIG_HOTPLUG_CPU + +/* must be called with the cpucontrol mutex held */ +static int __devinit cpu_enable(unsigned int cpu) { - /* This only works at boot for x86. See "rewrite" above. */ - if (cpu_isset(cpu, smp_commenced_mask)) { - local_irq_enable(); - return -ENOSYS; + /* get the target out of its holding state */ + per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; + wmb(); + + /* wait for the processor to ack it. timeout? */ + while (!cpu_online(cpu)) + cpu_relax(); + + fixup_irqs(cpu_online_map); + /* counter the disable in fixup_irqs() */ + local_irq_enable(); + return 0; +} + +int __cpu_disable(void) +{ + cpumask_t map = cpu_online_map; + int cpu = smp_processor_id(); + + /* + * Perhaps use cpufreq to drop frequency, but that could go + * into generic code. + * + * We won't take down the boot processor on i386 due to some + * interrupts only being able to be serviced by the BSP. + * Especially so if we're not using an IOAPIC -zwane + */ + if (cpu == 0) + return -EBUSY; + + /* We enable the timer again on the exit path of the death loop */ + disable_APIC_timer(); + /* Allow any queued timer interrupts to get serviced */ + local_irq_enable(); + mdelay(1); + local_irq_disable(); + + cpu_clear(cpu, map); + fixup_irqs(map); + /* It's now safe to remove this processor from the online map */ + cpu_clear(cpu, cpu_online_map); + return 0; +} + +void __cpu_die(unsigned int cpu) +{ + /* We don't do anything here: idle task is faking death itself. */ + unsigned int i; + + for (i = 0; i < 10; i++) { + /* They ack this in play_dead by setting CPU_DEAD */ + if (per_cpu(cpu_state, cpu) == CPU_DEAD) + return; + current->state = TASK_UNINTERRUPTIBLE; + schedule_timeout(HZ/10); } + printk(KERN_ERR "CPU %u didn't die...\n", cpu); +} +#else /* ... !CONFIG_HOTPLUG_CPU */ +int __cpu_disable(void) +{ + return -ENOSYS; +} +void __cpu_die(unsigned int cpu) +{ + /* We said "no" in __cpu_disable */ + BUG(); +} +#endif /* CONFIG_HOTPLUG_CPU */ + +int __devinit __cpu_up(unsigned int cpu) +{ /* In case one didn't come up */ if (!cpu_isset(cpu, cpu_callin_map)) { + printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu); local_irq_enable(); return -EIO; } +#ifdef CONFIG_HOTPLUG_CPU + /* Already up, and in cpu_quiescent now? */ + if (cpu_isset(cpu, smp_commenced_mask)) { + cpu_enable(cpu); + return 0; + } +#endif + local_irq_enable(); /* Unleash the CPU! */ cpu_set(cpu, smp_commenced_mask); diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index e4d4e2162c7..207ea8ba716 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -625,6 +625,14 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code) nmi_enter(); cpu = smp_processor_id(); + +#ifdef CONFIG_HOTPLUG_CPU + if (!cpu_online(cpu)) { + nmi_exit(); + return; + } +#endif + ++nmi_count(cpu); if (!nmi_callback(regs, cpu)) diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 3865f088ffa..a888ddc10f7 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -688,6 +688,7 @@ int __cpu_disable(void) return -EBUSY; remove_siblinginfo(cpu); + cpu_clear(cpu, cpu_online_map); fixup_irqs(); local_flush_tlb_all(); cpu_clear(cpu, cpu_callin_map); diff --git a/arch/ppc64/kernel/pSeries_smp.c b/arch/ppc64/kernel/pSeries_smp.c index 30154140f7e..62c55a12356 100644 --- a/arch/ppc64/kernel/pSeries_smp.c +++ b/arch/ppc64/kernel/pSeries_smp.c @@ -93,10 +93,13 @@ static int query_cpu_stopped(unsigned int pcpu) int pSeries_cpu_disable(void) { + int cpu = smp_processor_id(); + + cpu_clear(cpu, cpu_online_map); systemcfg->processorCount--; /*fix boot_cpuid here*/ - if (smp_processor_id() == boot_cpuid) + if (cpu == boot_cpuid) boot_cpuid = any_online_cpu(cpu_online_map); /* FIXME: abstract this to not be platform specific later on */ diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index fdfcf0488b4..93c71fef99d 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -679,12 +679,14 @@ __cpu_disable(void) { unsigned long flags; ec_creg_mask_parms cr_parms; + int cpu = smp_processor_id(); spin_lock_irqsave(&smp_reserve_lock, flags); - if (smp_cpu_reserved[smp_processor_id()] != 0) { + if (smp_cpu_reserved[cpu] != 0) { spin_unlock_irqrestore(&smp_reserve_lock, flags); return -EBUSY; } + cpu_clear(cpu, cpu_online_map); #ifdef CONFIG_PFAULT /* Disable pfault pseudo page faults on this cpu. */ diff --git a/include/asm-i386/cpu.h b/include/asm-i386/cpu.h index 002740b2195..e7252c216ca 100644 --- a/include/asm-i386/cpu.h +++ b/include/asm-i386/cpu.h @@ -5,6 +5,7 @@ #include #include #include +#include #include @@ -16,4 +17,5 @@ extern int arch_register_cpu(int num); extern void arch_unregister_cpu(int); #endif +DECLARE_PER_CPU(int, cpu_state); #endif /* _ASM_I386_CPU_H_ */ diff --git a/include/asm-i386/irq.h b/include/asm-i386/irq.h index 05b9e61b0a7..e2d8bf23ad7 100644 --- a/include/asm-i386/irq.h +++ b/include/asm-i386/irq.h @@ -38,4 +38,8 @@ extern void release_vm86_irqs(struct task_struct *); extern int irqbalance_disable(char *str); #endif +#ifdef CONFIG_HOTPLUG_CPU +extern void fixup_irqs(cpumask_t map); +#endif + #endif /* _ASM_IRQ_H */ diff --git a/include/asm-i386/smp.h b/include/asm-i386/smp.h index 55ef31f66bb..507f2fd39a6 100644 --- a/include/asm-i386/smp.h +++ b/include/asm-i386/smp.h @@ -83,6 +83,9 @@ static __inline int logical_smp_processor_id(void) } #endif + +extern int __cpu_disable(void); +extern void __cpu_die(unsigned int cpu); #endif /* !__ASSEMBLY__ */ #define NO_PROC_ID 0xFF /* No processor magic marker */ diff --git a/kernel/cpu.c b/kernel/cpu.c index 628f4ccda12..53d8263ae12 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -63,19 +63,15 @@ static int take_cpu_down(void *unused) { int err; - /* Take offline: makes arch_cpu_down somewhat easier. */ - cpu_clear(smp_processor_id(), cpu_online_map); - /* Ensure this CPU doesn't handle any more interrupts. */ err = __cpu_disable(); if (err < 0) - cpu_set(smp_processor_id(), cpu_online_map); - else - /* Force idle task to run as soon as we yield: it should - immediately notice cpu is offline and die quickly. */ - sched_idle_next(); + return err; - return err; + /* Force idle task to run as soon as we yield: it should + immediately notice cpu is offline and die quickly. */ + sched_idle_next(); + return 0; } int cpu_down(unsigned int cpu) -- cgit v1.2.3 From 67664c8f7e74def5adf66298a1245d82af72db2c Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Sat, 25 Jun 2005 14:54:52 -0700 Subject: [PATCH] i386: Dont use IPI broadcast when using cpu hotplug. This patch introduces a startup parameter no_broadcast. When we enable CONFIG_HOTPLUG_CPU, we dont want to use broadcast shortcut as it has ill effects on a offline cpu. If we issue broadcast, the IPI is also delivered to offline cpus, or partially up cpu causing stale IPI's to be handled, which is a problem and can cause undesirable effects. Introduces a new startup cmdline option no_ipi_broadcast, that can be switched at cmdline if necessary. Signed-off-by: Ashok Raj Acked-by: Shaohua Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/mach-default/setup.c | 27 +++++++++++++++++++++++++++ include/asm-i386/mach-default/mach_ipi.h | 27 +++++++++++++++++++++++++-- 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/arch/i386/mach-default/setup.c b/arch/i386/mach-default/setup.c index 0aa08eaa893..e5a1a83d09e 100644 --- a/arch/i386/mach-default/setup.c +++ b/arch/i386/mach-default/setup.c @@ -10,6 +10,14 @@ #include #include +#ifdef CONFIG_HOTPLUG_CPU +#define DEFAULT_SEND_IPI (1) +#else +#define DEFAULT_SEND_IPI (0) +#endif + +int no_broadcast=DEFAULT_SEND_IPI; + /** * pre_intr_init_hook - initialisation prior to setting up interrupt vectors * @@ -104,3 +112,22 @@ void __init mca_nmi_hook(void) printk("NMI generated from unknown source!\n"); } #endif + +static __init int no_ipi_broadcast(char *str) +{ + get_option(&str, &no_broadcast); + printk ("Using %s mode\n", no_broadcast ? "No IPI Broadcast" : + "IPI Broadcast"); + return 1; +} + +__setup("no_ipi_broadcast", no_ipi_broadcast); + +static int __init print_ipi_mode(void) +{ + printk ("Using IPI %s mode\n", no_broadcast ? "No-Shortcut" : + "Shortcut"); + return 0; +} + +late_initcall(print_ipi_mode); diff --git a/include/asm-i386/mach-default/mach_ipi.h b/include/asm-i386/mach-default/mach_ipi.h index 6f2b17a2008..cc756a67cd6 100644 --- a/include/asm-i386/mach-default/mach_ipi.h +++ b/include/asm-i386/mach-default/mach_ipi.h @@ -4,11 +4,34 @@ void send_IPI_mask_bitmask(cpumask_t mask, int vector); void __send_IPI_shortcut(unsigned int shortcut, int vector); +extern int no_broadcast; + static inline void send_IPI_mask(cpumask_t mask, int vector) { send_IPI_mask_bitmask(mask, vector); } +static inline void __local_send_IPI_allbutself(int vector) +{ + if (no_broadcast) { + cpumask_t mask = cpu_online_map; + int this_cpu = get_cpu(); + + cpu_clear(this_cpu, mask); + send_IPI_mask(mask, vector); + put_cpu(); + } else + __send_IPI_shortcut(APIC_DEST_ALLBUT, vector); +} + +static inline void __local_send_IPI_all(int vector) +{ + if (no_broadcast) + send_IPI_mask(cpu_online_map, vector); + else + __send_IPI_shortcut(APIC_DEST_ALLINC, vector); +} + static inline void send_IPI_allbutself(int vector) { /* @@ -18,13 +41,13 @@ static inline void send_IPI_allbutself(int vector) if (!(num_online_cpus() > 1)) return; - __send_IPI_shortcut(APIC_DEST_ALLBUT, vector); + __local_send_IPI_allbutself(vector); return; } static inline void send_IPI_all(int vector) { - __send_IPI_shortcut(APIC_DEST_ALLINC, vector); + __local_send_IPI_all(vector); } #endif /* __ASM_MACH_IPI_H */ -- cgit v1.2.3 From 6fe940d6c300886de4ff1454d8ffd363172af433 Mon Sep 17 00:00:00 2001 From: Li Shaohua Date: Sat, 25 Jun 2005 14:54:53 -0700 Subject: [PATCH] sep initializing rework Make SEP init per-cpu, so it is hotplug safe. Signed-off-by: Li Shaohua Signed-off-by: Ashok Raj Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/common.c | 3 +++ arch/i386/kernel/smp.c | 10 ++++++++++ arch/i386/kernel/smpboot.c | 11 +++++++++++ arch/i386/kernel/sysenter.c | 12 +++++++----- arch/i386/power/cpu.c | 6 +++--- include/asm-i386/processor.h | 2 ++ include/asm-i386/smp.h | 2 ++ 7 files changed, 38 insertions(+), 8 deletions(-) diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index b9954248d0a..d58e169fbdb 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -432,6 +432,9 @@ void __init identify_cpu(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_MCE mcheck_init(c); #endif + if (c == &boot_cpu_data) + sysenter_setup(); + enable_sep_cpu(); } #ifdef CONFIG_X86_HT diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index 35f521612b2..cec4bde6716 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c @@ -495,6 +495,16 @@ struct call_data_struct { int wait; }; +void lock_ipi_call_lock(void) +{ + spin_lock_irq(&call_lock); +} + +void unlock_ipi_call_lock(void) +{ + spin_unlock_irq(&call_lock); +} + static struct call_data_struct * call_data; /* diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index ad74a46e9ef..c5517f33230 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -449,7 +449,18 @@ static void __init start_secondary(void *unused) * the local TLBs too. */ local_flush_tlb(); + + /* + * We need to hold call_lock, so there is no inconsistency + * between the time smp_call_function() determines number of + * IPI receipients, and the time when the determination is made + * for which cpus receive the IPI. Holding this + * lock helps us to not include this cpu in a currently in progress + * smp_call_function(). + */ + lock_ipi_call_lock(); cpu_set(smp_processor_id(), cpu_online_map); + unlock_ipi_call_lock(); /* We can take interrupts now: we're officially "up". */ local_irq_enable(); diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c index 960d8bd137d..0bada1870bd 100644 --- a/arch/i386/kernel/sysenter.c +++ b/arch/i386/kernel/sysenter.c @@ -21,11 +21,16 @@ extern asmlinkage void sysenter_entry(void); -void enable_sep_cpu(void *info) +void enable_sep_cpu(void) { int cpu = get_cpu(); struct tss_struct *tss = &per_cpu(init_tss, cpu); + if (!boot_cpu_has(X86_FEATURE_SEP)) { + put_cpu(); + return; + } + tss->ss1 = __KERNEL_CS; tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss; wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); @@ -41,7 +46,7 @@ void enable_sep_cpu(void *info) extern const char vsyscall_int80_start, vsyscall_int80_end; extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; -static int __init sysenter_setup(void) +int __init sysenter_setup(void) { void *page = (void *)get_zeroed_page(GFP_ATOMIC); @@ -58,8 +63,5 @@ static int __init sysenter_setup(void) &vsyscall_sysenter_start, &vsyscall_sysenter_end - &vsyscall_sysenter_start); - on_each_cpu(enable_sep_cpu, NULL, 1, 1); return 0; } - -__initcall(sysenter_setup); diff --git a/arch/i386/power/cpu.c b/arch/i386/power/cpu.c index 6f521cf19a1..d099d01461f 100644 --- a/arch/i386/power/cpu.c +++ b/arch/i386/power/cpu.c @@ -22,9 +22,11 @@ #include #include #include + #include #include #include +#include static struct saved_context saved_context; @@ -33,8 +35,6 @@ unsigned long saved_context_esp, saved_context_ebp; unsigned long saved_context_esi, saved_context_edi; unsigned long saved_context_eflags; -extern void enable_sep_cpu(void *); - void __save_processor_state(struct saved_context *ctxt) { kernel_fpu_begin(); @@ -136,7 +136,7 @@ void __restore_processor_state(struct saved_context *ctxt) * sysenter MSRs */ if (boot_cpu_has(X86_FEATURE_SEP)) - enable_sep_cpu(NULL); + enable_sep_cpu(); fix_processor_context(); do_fpu_end(); diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index c76c50e9622..6f0f93d0d41 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -691,5 +691,7 @@ extern void select_idle_routine(const struct cpuinfo_x86 *c); #define cache_line_size() (boot_cpu_data.x86_cache_alignment) extern unsigned long boot_option_idle_override; +extern void enable_sep_cpu(void); +extern int sysenter_setup(void); #endif /* __ASM_I386_PROCESSOR_H */ diff --git a/include/asm-i386/smp.h b/include/asm-i386/smp.h index 507f2fd39a6..2451ead0ca5 100644 --- a/include/asm-i386/smp.h +++ b/include/asm-i386/smp.h @@ -42,6 +42,8 @@ extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs); extern void smp_invalidate_rcv(void); /* Process an NMI */ extern void (*mtrr_hook) (void); extern void zap_low_mappings (void); +extern void lock_ipi_call_lock(void); +extern void unlock_ipi_call_lock(void); #define MAX_APICID 256 extern u8 x86_cpu_to_apicid[]; -- cgit v1.2.3 From d720803a9365d360b3e5ea02033f0c11b5b1226a Mon Sep 17 00:00:00 2001 From: Li Shaohua Date: Sat, 25 Jun 2005 14:54:54 -0700 Subject: [PATCH] sibling map initializing rework Make sibling map init per-cpu. Hotplug CPU may change the map at runtime. Signed-off-by: Li Shaohua Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/smpboot.c | 97 ++++++++++++++++++++++++---------------------- 1 file changed, 50 insertions(+), 47 deletions(-) diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index c5517f33230..09b4ceb832b 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -66,11 +66,21 @@ int smp_num_siblings = 1; #ifdef CONFIG_X86_HT EXPORT_SYMBOL(smp_num_siblings); #endif -int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */ + +/* Package ID of each logical CPU */ +int phys_proc_id[NR_CPUS] = {[0 ... NR_CPUS-1] = BAD_APICID}; EXPORT_SYMBOL(phys_proc_id); -int cpu_core_id[NR_CPUS]; /* Core ID of each logical CPU */ + +/* Core ID of each logical CPU */ +int cpu_core_id[NR_CPUS] = {[0 ... NR_CPUS-1] = BAD_APICID}; EXPORT_SYMBOL(cpu_core_id); +cpumask_t cpu_sibling_map[NR_CPUS]; +EXPORT_SYMBOL(cpu_sibling_map); + +cpumask_t cpu_core_map[NR_CPUS]; +EXPORT_SYMBOL(cpu_core_map); + /* bitmap of online cpus */ cpumask_t cpu_online_map; EXPORT_SYMBOL(cpu_online_map); @@ -423,6 +433,38 @@ static void __init smp_callin(void) static int cpucount; +static inline void +set_cpu_sibling_map(int cpu) +{ + int i; + + if (smp_num_siblings > 1) { + for (i = 0; i < NR_CPUS; i++) { + if (!cpu_isset(i, cpu_callout_map)) + continue; + if (cpu_core_id[cpu] == cpu_core_id[i]) { + cpu_set(i, cpu_sibling_map[cpu]); + cpu_set(cpu, cpu_sibling_map[i]); + } + } + } else { + cpu_set(cpu, cpu_sibling_map[cpu]); + } + + if (current_cpu_data.x86_num_cores > 1) { + for (i = 0; i < NR_CPUS; i++) { + if (!cpu_isset(i, cpu_callout_map)) + continue; + if (phys_proc_id[cpu] == phys_proc_id[i]) { + cpu_set(i, cpu_core_map[cpu]); + cpu_set(cpu, cpu_core_map[i]); + } + } + } else { + cpu_core_map[cpu] = cpu_sibling_map[cpu]; + } +} + /* * Activate a secondary processor. */ @@ -450,6 +492,10 @@ static void __init start_secondary(void *unused) */ local_flush_tlb(); + /* This must be done before setting cpu_online_map */ + set_cpu_sibling_map(raw_smp_processor_id()); + wmb(); + /* * We need to hold call_lock, so there is no inconsistency * between the time smp_call_function() determines number of @@ -912,13 +958,6 @@ void *xquad_portio; EXPORT_SYMBOL(xquad_portio); #endif -cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; -#ifdef CONFIG_X86_HT -EXPORT_SYMBOL(cpu_sibling_map); -#endif -cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; -EXPORT_SYMBOL(cpu_core_map); - static void __init smp_boot_cpus(unsigned int max_cpus) { int apicid, cpu, bit, kicked; @@ -1082,44 +1121,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus) cpus_clear(cpu_core_map[cpu]); } - for (cpu = 0; cpu < NR_CPUS; cpu++) { - struct cpuinfo_x86 *c = cpu_data + cpu; - int siblings = 0; - int i; - if (!cpu_isset(cpu, cpu_callout_map)) - continue; - - if (smp_num_siblings > 1) { - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_isset(i, cpu_callout_map)) - continue; - if (cpu_core_id[cpu] == cpu_core_id[i]) { - siblings++; - cpu_set(i, cpu_sibling_map[cpu]); - } - } - } else { - siblings++; - cpu_set(cpu, cpu_sibling_map[cpu]); - } - - if (siblings != smp_num_siblings) { - printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings); - smp_num_siblings = siblings; - } - - if (c->x86_num_cores > 1) { - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_isset(i, cpu_callout_map)) - continue; - if (phys_proc_id[cpu] == phys_proc_id[i]) { - cpu_set(i, cpu_core_map[cpu]); - } - } - } else { - cpu_core_map[cpu] = cpu_sibling_map[cpu]; - } - } + cpu_set(0, cpu_sibling_map[0]); + cpu_set(0, cpu_core_map[0]); smpboot_setup_io_apic(); -- cgit v1.2.3 From 0bb3184df537002a742bafddf3f4fb482b7fe610 Mon Sep 17 00:00:00 2001 From: Li Shaohua Date: Sat, 25 Jun 2005 14:54:55 -0700 Subject: [PATCH] init call cleanup Trival patch for CPU hotplug. In CPU identify part, only did cleaup for intel CPUs. Need do for other CPUs if they support S3 SMP. Signed-off-by: Li Shaohua Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/apic.c | 14 +++++++------- arch/i386/kernel/cpu/common.c | 30 +++++++++++++++--------------- arch/i386/kernel/cpu/intel.c | 12 ++++++------ arch/i386/kernel/cpu/intel_cacheinfo.c | 4 ++-- arch/i386/kernel/cpu/mcheck/mce.c | 2 +- arch/i386/kernel/cpu/mcheck/p5.c | 2 +- arch/i386/kernel/process.c | 2 +- arch/i386/kernel/setup.c | 2 +- arch/i386/kernel/smpboot.c | 18 +++++++++--------- arch/i386/kernel/timers/timer_tsc.c | 2 +- 10 files changed, 44 insertions(+), 44 deletions(-) diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index a28a088f3e7..b905d7bb9a0 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -364,7 +364,7 @@ void __init init_bsp_APIC(void) apic_write_around(APIC_LVT1, value); } -void __init setup_local_APIC (void) +void __devinit setup_local_APIC(void) { unsigned long oldvalue, value, ver, maxlvt; @@ -635,7 +635,7 @@ static struct sys_device device_lapic = { .cls = &lapic_sysclass, }; -static void __init apic_pm_activate(void) +static void __devinit apic_pm_activate(void) { apic_pm_state.active = 1; } @@ -856,7 +856,7 @@ fake_ioapic_page: * but we do not accept timer interrupts yet. We only allow the BP * to calibrate. */ -static unsigned int __init get_8254_timer_count(void) +static unsigned int __devinit get_8254_timer_count(void) { extern spinlock_t i8253_lock; unsigned long flags; @@ -875,7 +875,7 @@ static unsigned int __init get_8254_timer_count(void) } /* next tick in 8254 can be caught by catching timer wraparound */ -static void __init wait_8254_wraparound(void) +static void __devinit wait_8254_wraparound(void) { unsigned int curr_count, prev_count; @@ -895,7 +895,7 @@ static void __init wait_8254_wraparound(void) * Default initialization for 8254 timers. If we use other timers like HPET, * we override this later */ -void (*wait_timer_tick)(void) __initdata = wait_8254_wraparound; +void (*wait_timer_tick)(void) __devinitdata = wait_8254_wraparound; /* * This function sets up the local APIC timer, with a timeout of @@ -931,7 +931,7 @@ static void __setup_APIC_LVTT(unsigned int clocks) apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); } -static void __init setup_APIC_timer(unsigned int clocks) +static void __devinit setup_APIC_timer(unsigned int clocks) { unsigned long flags; @@ -1044,7 +1044,7 @@ void __init setup_boot_APIC_clock(void) local_irq_enable(); } -void __init setup_secondary_APIC_clock(void) +void __devinit setup_secondary_APIC_clock(void) { setup_APIC_timer(calibration_result); } diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index d58e169fbdb..aac74758caf 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -24,9 +24,9 @@ EXPORT_PER_CPU_SYMBOL(cpu_gdt_table); DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack); -static int cachesize_override __initdata = -1; -static int disable_x86_fxsr __initdata = 0; -static int disable_x86_serial_nr __initdata = 1; +static int cachesize_override __devinitdata = -1; +static int disable_x86_fxsr __devinitdata = 0; +static int disable_x86_serial_nr __devinitdata = 1; struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {}; @@ -59,7 +59,7 @@ static int __init cachesize_setup(char *str) } __setup("cachesize=", cachesize_setup); -int __init get_model_name(struct cpuinfo_x86 *c) +int __devinit get_model_name(struct cpuinfo_x86 *c) { unsigned int *v; char *p, *q; @@ -89,7 +89,7 @@ int __init get_model_name(struct cpuinfo_x86 *c) } -void __init display_cacheinfo(struct cpuinfo_x86 *c) +void __devinit display_cacheinfo(struct cpuinfo_x86 *c) { unsigned int n, dummy, ecx, edx, l2size; @@ -130,7 +130,7 @@ void __init display_cacheinfo(struct cpuinfo_x86 *c) /* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */ /* Look up CPU names by table lookup. */ -static char __init *table_lookup_model(struct cpuinfo_x86 *c) +static char __devinit *table_lookup_model(struct cpuinfo_x86 *c) { struct cpu_model_info *info; @@ -151,7 +151,7 @@ static char __init *table_lookup_model(struct cpuinfo_x86 *c) } -void __init get_cpu_vendor(struct cpuinfo_x86 *c, int early) +void __devinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) { char *v = c->x86_vendor_id; int i; @@ -202,7 +202,7 @@ static inline int flag_is_changeable_p(u32 flag) /* Probe for the CPUID instruction */ -static int __init have_cpuid_p(void) +static int __devinit have_cpuid_p(void) { return flag_is_changeable_p(X86_EFLAGS_ID); } @@ -249,7 +249,7 @@ static void __init early_cpu_detect(void) #endif } -void __init generic_identify(struct cpuinfo_x86 * c) +void __devinit generic_identify(struct cpuinfo_x86 * c) { u32 tfms, xlvl; int junk; @@ -296,7 +296,7 @@ void __init generic_identify(struct cpuinfo_x86 * c) } } -static void __init squash_the_stupid_serial_number(struct cpuinfo_x86 *c) +static void __devinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) { /* Disable processor serial number */ @@ -324,7 +324,7 @@ __setup("serialnumber", x86_serial_nr_setup); /* * This does the hard work of actually picking apart the CPU stuff... */ -void __init identify_cpu(struct cpuinfo_x86 *c) +void __devinit identify_cpu(struct cpuinfo_x86 *c) { int i; @@ -438,7 +438,7 @@ void __init identify_cpu(struct cpuinfo_x86 *c) } #ifdef CONFIG_X86_HT -void __init detect_ht(struct cpuinfo_x86 *c) +void __devinit detect_ht(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; int index_msb, tmp; @@ -493,7 +493,7 @@ void __init detect_ht(struct cpuinfo_x86 *c) } #endif -void __init print_cpu_info(struct cpuinfo_x86 *c) +void __devinit print_cpu_info(struct cpuinfo_x86 *c) { char *vendor = NULL; @@ -516,7 +516,7 @@ void __init print_cpu_info(struct cpuinfo_x86 *c) printk("\n"); } -cpumask_t cpu_initialized __initdata = CPU_MASK_NONE; +cpumask_t cpu_initialized __devinitdata = CPU_MASK_NONE; /* This is hacky. :) * We're emulating future behavior. @@ -563,7 +563,7 @@ void __init early_cpu_init(void) * and IDT. We reload them nevertheless, this function acts as a * 'CPU state barrier', nothing should get across. */ -void __init cpu_init (void) +void __devinit cpu_init(void) { int cpu = smp_processor_id(); struct tss_struct * t = &per_cpu(init_tss, cpu); diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c index 121aa2176e6..96a75d04583 100644 --- a/arch/i386/kernel/cpu/intel.c +++ b/arch/i386/kernel/cpu/intel.c @@ -28,7 +28,7 @@ extern int trap_init_f00f_bug(void); struct movsl_mask movsl_mask; #endif -void __init early_intel_workaround(struct cpuinfo_x86 *c) +void __devinit early_intel_workaround(struct cpuinfo_x86 *c) { if (c->x86_vendor != X86_VENDOR_INTEL) return; @@ -43,7 +43,7 @@ void __init early_intel_workaround(struct cpuinfo_x86 *c) * This is called before we do cpu ident work */ -int __init ppro_with_ram_bug(void) +int __devinit ppro_with_ram_bug(void) { /* Uses data from early_cpu_detect now */ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && @@ -61,7 +61,7 @@ int __init ppro_with_ram_bug(void) * P4 Xeon errata 037 workaround. * Hardware prefetcher may cause stale data to be loaded into the cache. */ -static void __init Intel_errata_workarounds(struct cpuinfo_x86 *c) +static void __devinit Intel_errata_workarounds(struct cpuinfo_x86 *c) { unsigned long lo, hi; @@ -80,7 +80,7 @@ static void __init Intel_errata_workarounds(struct cpuinfo_x86 *c) /* * find out the number of processor cores on the die */ -static int __init num_cpu_cores(struct cpuinfo_x86 *c) +static int __devinit num_cpu_cores(struct cpuinfo_x86 *c) { unsigned int eax; @@ -98,7 +98,7 @@ static int __init num_cpu_cores(struct cpuinfo_x86 *c) return 1; } -static void __init init_intel(struct cpuinfo_x86 *c) +static void __devinit init_intel(struct cpuinfo_x86 *c) { unsigned int l2 = 0; char *p = NULL; @@ -204,7 +204,7 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 * c, unsigned int size) return size; } -static struct cpu_dev intel_cpu_dev __initdata = { +static struct cpu_dev intel_cpu_dev __devinitdata = { .c_vendor = "Intel", .c_ident = { "GenuineIntel" }, .c_models = { diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index a710dc4eb20..1d768b26326 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -28,7 +28,7 @@ struct _cache_table }; /* all the cache descriptor types we care about (no TLB or trace cache entries) */ -static struct _cache_table cache_table[] __initdata = +static struct _cache_table cache_table[] __devinitdata = { { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */ @@ -160,7 +160,7 @@ static int __init find_num_cache_leaves(void) return retval; } -unsigned int __init init_intel_cacheinfo(struct cpuinfo_x86 *c) +unsigned int __devinit init_intel_cacheinfo(struct cpuinfo_x86 *c) { unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ diff --git a/arch/i386/kernel/cpu/mcheck/mce.c b/arch/i386/kernel/cpu/mcheck/mce.c index bf6d1aefafc..7218a7341fb 100644 --- a/arch/i386/kernel/cpu/mcheck/mce.c +++ b/arch/i386/kernel/cpu/mcheck/mce.c @@ -31,7 +31,7 @@ static fastcall void unexpected_machine_check(struct pt_regs * regs, long error_ void fastcall (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check; /* This has to be run for each processor */ -void __init mcheck_init(struct cpuinfo_x86 *c) +void __devinit mcheck_init(struct cpuinfo_x86 *c) { if (mce_disabled==1) return; diff --git a/arch/i386/kernel/cpu/mcheck/p5.c b/arch/i386/kernel/cpu/mcheck/p5.c index c45a1b485c8..ec0614cd292 100644 --- a/arch/i386/kernel/cpu/mcheck/p5.c +++ b/arch/i386/kernel/cpu/mcheck/p5.c @@ -29,7 +29,7 @@ static fastcall void pentium_machine_check(struct pt_regs * regs, long error_cod } /* Set up machine check reporting for processors with Intel style MCE */ -void __init intel_p5_mcheck_init(struct cpuinfo_x86 *c) +void __devinit intel_p5_mcheck_init(struct cpuinfo_x86 *c) { u32 l, h; diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index c1b11e8df60..e06f2dc7123 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -260,7 +260,7 @@ static void mwait_idle(void) } } -void __init select_idle_routine(const struct cpuinfo_x86 *c) +void __devinit select_idle_routine(const struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_MWAIT)) { printk("monitor/mwait feature present.\n"); diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 30406fd0b64..cba67e4ba0a 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -60,7 +60,7 @@ address, and must not be in the .bss segment! */ unsigned long init_pg_tables_end __initdata = ~0UL; -int disable_pse __initdata = 0; +int disable_pse __devinitdata = 0; /* * Machine setup.. diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 09b4ceb832b..fb0b200d1d8 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -59,7 +59,7 @@ #include /* Set if we find a B stepping CPU */ -static int __initdata smp_b_stepping; +static int __devinitdata smp_b_stepping; /* Number of siblings per CPU package */ int smp_num_siblings = 1; @@ -118,7 +118,7 @@ DEFINE_PER_CPU(int, cpu_state) = { 0 }; * has made sure it's suitably aligned. */ -static unsigned long __init setup_trampoline(void) +static unsigned long __devinit setup_trampoline(void) { memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data); return virt_to_phys(trampoline_base); @@ -148,7 +148,7 @@ void __init smp_alloc_memory(void) * a given CPU */ -static void __init smp_store_cpu_info(int id) +static void __devinit smp_store_cpu_info(int id) { struct cpuinfo_x86 *c = cpu_data + id; @@ -342,7 +342,7 @@ extern void calibrate_delay(void); static atomic_t init_deasserted; -static void __init smp_callin(void) +static void __devinit smp_callin(void) { int cpuid, phys_id; unsigned long timeout; @@ -468,7 +468,7 @@ set_cpu_sibling_map(int cpu) /* * Activate a secondary processor. */ -static void __init start_secondary(void *unused) +static void __devinit start_secondary(void *unused) { /* * Dont put anything before smp_callin(), SMP @@ -521,7 +521,7 @@ static void __init start_secondary(void *unused) * from the task structure * This function must not return. */ -void __init initialize_secondary(void) +void __devinit initialize_secondary(void) { /* * We don't actually need to load the full TSS, @@ -635,7 +635,7 @@ static inline void __inquire_remote_apic(int apicid) * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this * won't ... remember to clear down the APIC, etc later. */ -static int __init +static int __devinit wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) { unsigned long send_status = 0, accept_status = 0; @@ -681,7 +681,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) #endif /* WAKE_SECONDARY_VIA_NMI */ #ifdef WAKE_SECONDARY_VIA_INIT -static int __init +static int __devinit wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) { unsigned long send_status = 0, accept_status = 0; @@ -817,7 +817,7 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) extern cpumask_t cpu_initialized; -static int __init do_boot_cpu(int apicid) +static int __devinit do_boot_cpu(int apicid) /* * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad * (ie clustered apic addressing mode), this is a LOGICAL apic ID. diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c index 54c36b18202..f46e625bab6 100644 --- a/arch/i386/kernel/timers/timer_tsc.c +++ b/arch/i386/kernel/timers/timer_tsc.c @@ -33,7 +33,7 @@ static struct timer_opts timer_tsc; static inline void cpufreq_delayed_get(void); -int tsc_disable __initdata = 0; +int tsc_disable __devinitdata = 0; extern spinlock_t i8253_lock; -- cgit v1.2.3 From e1367daf3eed5cd619ee88c9907e1e6ddaa58406 Mon Sep 17 00:00:00 2001 From: Li Shaohua Date: Sat, 25 Jun 2005 14:54:56 -0700 Subject: [PATCH] cpu state clean after hot remove Clean CPU states in order to reuse smp boot code for CPU hotplug. Signed-off-by: Li Shaohua Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/common.c | 12 +++ arch/i386/kernel/irq.c | 5 ++ arch/i386/kernel/process.c | 20 +++-- arch/i386/kernel/smpboot.c | 175 ++++++++++++++++++++++++++++++++++-------- drivers/base/cpu.c | 8 +- include/asm-i386/irq.h | 2 + include/asm-i386/smp.h | 8 ++ 7 files changed, 187 insertions(+), 43 deletions(-) diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index aac74758caf..2203a9d2021 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -651,3 +651,15 @@ void __devinit cpu_init(void) clear_used_math(); mxcsr_feature_mask_init(); } + +#ifdef CONFIG_HOTPLUG_CPU +void __devinit cpu_uninit(void) +{ + int cpu = raw_smp_processor_id(); + cpu_clear(cpu, cpu_initialized); + + /* lazy TLB state */ + per_cpu(cpu_tlbstate, cpu).state = 0; + per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; +} +#endif diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index af115004aec..ce66dcc26d9 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c @@ -156,6 +156,11 @@ void irq_ctx_init(int cpu) cpu,hardirq_ctx[cpu],softirq_ctx[cpu]); } +void irq_ctx_exit(int cpu) +{ + hardirq_ctx[cpu] = NULL; +} + extern asmlinkage void __do_softirq(void); asmlinkage void do_softirq(void) diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index e06f2dc7123..5f8cfa6b794 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -152,21 +152,19 @@ static void poll_idle (void) /* We don't actually take CPU down, just spin without interrupts. */ static inline void play_dead(void) { + /* This must be done before dead CPU ack */ + cpu_exit_clear(); + wbinvd(); + mb(); /* Ack it */ __get_cpu_var(cpu_state) = CPU_DEAD; - /* We shouldn't have to disable interrupts while dead, but - * some interrupts just don't seem to go away, and this makes - * it "work" for testing purposes. */ - /* Death loop */ - while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE) - cpu_relax(); - + /* + * With physical CPU hotplug, we should halt the cpu + */ local_irq_disable(); - __flush_tlb_all(); - cpu_set(smp_processor_id(), cpu_online_map); - enable_APIC_timer(); - local_irq_enable(); + while (1) + __asm__ __volatile__("hlt":::"memory"); } #else static inline void play_dead(void) diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index fb0b200d1d8..d66bf489a2e 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -90,6 +90,12 @@ cpumask_t cpu_callout_map; EXPORT_SYMBOL(cpu_callout_map); static cpumask_t smp_commenced_mask; +/* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there + * is no way to resync one AP against BP. TBD: for prescott and above, we + * should use IA64's algorithm + */ +static int __devinitdata tsc_sync_disabled; + /* Per CPU bogomips and other parameters */ struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; EXPORT_SYMBOL(cpu_data); @@ -427,7 +433,7 @@ static void __devinit smp_callin(void) /* * Synchronize the TSC with the BP */ - if (cpu_has_tsc && cpu_khz) + if (cpu_has_tsc && cpu_khz && !tsc_sync_disabled) synchronize_tsc_ap(); } @@ -507,6 +513,7 @@ static void __devinit start_secondary(void *unused) lock_ipi_call_lock(); cpu_set(smp_processor_id(), cpu_online_map); unlock_ipi_call_lock(); + per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; /* We can take interrupts now: we're officially "up". */ local_irq_enable(); @@ -816,8 +823,43 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) #endif /* WAKE_SECONDARY_VIA_INIT */ extern cpumask_t cpu_initialized; +static inline int alloc_cpu_id(void) +{ + cpumask_t tmp_map; + int cpu; + cpus_complement(tmp_map, cpu_present_map); + cpu = first_cpu(tmp_map); + if (cpu >= NR_CPUS) + return -ENODEV; + return cpu; +} + +#ifdef CONFIG_HOTPLUG_CPU +static struct task_struct * __devinitdata cpu_idle_tasks[NR_CPUS]; +static inline struct task_struct * alloc_idle_task(int cpu) +{ + struct task_struct *idle; + + if ((idle = cpu_idle_tasks[cpu]) != NULL) { + /* initialize thread_struct. we really want to avoid destroy + * idle tread + */ + idle->thread.esp = (unsigned long)(((struct pt_regs *) + (THREAD_SIZE + (unsigned long) idle->thread_info)) - 1); + init_idle(idle, cpu); + return idle; + } + idle = fork_idle(cpu); + + if (!IS_ERR(idle)) + cpu_idle_tasks[cpu] = idle; + return idle; +} +#else +#define alloc_idle_task(cpu) fork_idle(cpu) +#endif -static int __devinit do_boot_cpu(int apicid) +static int __devinit do_boot_cpu(int apicid, int cpu) /* * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad * (ie clustered apic addressing mode), this is a LOGICAL apic ID. @@ -826,16 +868,17 @@ static int __devinit do_boot_cpu(int apicid) { struct task_struct *idle; unsigned long boot_error; - int timeout, cpu; + int timeout; unsigned long start_eip; unsigned short nmi_high = 0, nmi_low = 0; - cpu = ++cpucount; + ++cpucount; + /* * We can't use kernel_thread since we must avoid to * reschedule the child. */ - idle = fork_idle(cpu); + idle = alloc_idle_task(cpu); if (IS_ERR(idle)) panic("failed fork for CPU %d", cpu); idle->thread.eip = (unsigned long) start_secondary; @@ -902,13 +945,16 @@ static int __devinit do_boot_cpu(int apicid) inquire_remote_apic(apicid); } } - x86_cpu_to_apicid[cpu] = apicid; + if (boot_error) { /* Try to put things back the way they were before ... */ unmap_cpu_to_logical_apicid(cpu); cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */ cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ cpucount--; + } else { + x86_cpu_to_apicid[cpu] = apicid; + cpu_set(cpu, cpu_present_map); } /* mark "stuck" area as not stuck */ @@ -917,6 +963,75 @@ static int __devinit do_boot_cpu(int apicid) return boot_error; } +#ifdef CONFIG_HOTPLUG_CPU +void cpu_exit_clear(void) +{ + int cpu = raw_smp_processor_id(); + + idle_task_exit(); + + cpucount --; + cpu_uninit(); + irq_ctx_exit(cpu); + + cpu_clear(cpu, cpu_callout_map); + cpu_clear(cpu, cpu_callin_map); + cpu_clear(cpu, cpu_present_map); + + cpu_clear(cpu, smp_commenced_mask); + unmap_cpu_to_logical_apicid(cpu); +} + +struct warm_boot_cpu_info { + struct completion *complete; + int apicid; + int cpu; +}; + +static void __devinit do_warm_boot_cpu(void *p) +{ + struct warm_boot_cpu_info *info = p; + do_boot_cpu(info->apicid, info->cpu); + complete(info->complete); +} + +int __devinit smp_prepare_cpu(int cpu) +{ + DECLARE_COMPLETION(done); + struct warm_boot_cpu_info info; + struct work_struct task; + int apicid, ret; + + lock_cpu_hotplug(); + apicid = x86_cpu_to_apicid[cpu]; + if (apicid == BAD_APICID) { + ret = -ENODEV; + goto exit; + } + + info.complete = &done; + info.apicid = apicid; + info.cpu = cpu; + INIT_WORK(&task, do_warm_boot_cpu, &info); + + tsc_sync_disabled = 1; + + /* init low mem mapping */ + memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, + sizeof(swapper_pg_dir[0]) * KERNEL_PGD_PTRS); + flush_tlb_all(); + schedule_work(&task); + wait_for_completion(&done); + + tsc_sync_disabled = 0; + zap_low_mappings(); + ret = 0; +exit: + unlock_cpu_hotplug(); + return ret; +} +#endif + static void smp_tune_scheduling (void) { unsigned long cachesize; /* kB */ @@ -1069,7 +1184,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) if (max_cpus <= cpucount+1) continue; - if (do_boot_cpu(apicid)) + if (((cpu = alloc_cpu_id()) <= 0) || do_boot_cpu(apicid, cpu)) printk("CPU #%d not responding - cannot use it.\n", apicid); else @@ -1149,25 +1264,24 @@ void __devinit smp_prepare_boot_cpu(void) { cpu_set(smp_processor_id(), cpu_online_map); cpu_set(smp_processor_id(), cpu_callout_map); + cpu_set(smp_processor_id(), cpu_present_map); + per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; } #ifdef CONFIG_HOTPLUG_CPU - -/* must be called with the cpucontrol mutex held */ -static int __devinit cpu_enable(unsigned int cpu) +static void +remove_siblinginfo(int cpu) { - /* get the target out of its holding state */ - per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; - wmb(); - - /* wait for the processor to ack it. timeout? */ - while (!cpu_online(cpu)) - cpu_relax(); - - fixup_irqs(cpu_online_map); - /* counter the disable in fixup_irqs() */ - local_irq_enable(); - return 0; + int sibling; + + for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) + cpu_clear(cpu, cpu_sibling_map[sibling]); + for_each_cpu_mask(sibling, cpu_core_map[cpu]) + cpu_clear(cpu, cpu_core_map[sibling]); + cpus_clear(cpu_sibling_map[cpu]); + cpus_clear(cpu_core_map[cpu]); + phys_proc_id[cpu] = BAD_APICID; + cpu_core_id[cpu] = BAD_APICID; } int __cpu_disable(void) @@ -1193,6 +1307,8 @@ int __cpu_disable(void) mdelay(1); local_irq_disable(); + remove_siblinginfo(cpu); + cpu_clear(cpu, map); fixup_irqs(map); /* It's now safe to remove this processor from the online map */ @@ -1207,8 +1323,10 @@ void __cpu_die(unsigned int cpu) for (i = 0; i < 10; i++) { /* They ack this in play_dead by setting CPU_DEAD */ - if (per_cpu(cpu_state, cpu) == CPU_DEAD) + if (per_cpu(cpu_state, cpu) == CPU_DEAD) { + printk ("CPU %d is now offline\n", cpu); return; + } current->state = TASK_UNINTERRUPTIBLE; schedule_timeout(HZ/10); } @@ -1236,15 +1354,8 @@ int __devinit __cpu_up(unsigned int cpu) return -EIO; } -#ifdef CONFIG_HOTPLUG_CPU - /* Already up, and in cpu_quiescent now? */ - if (cpu_isset(cpu, smp_commenced_mask)) { - cpu_enable(cpu); - return 0; - } -#endif - local_irq_enable(); + per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; /* Unleash the CPU! */ cpu_set(cpu, smp_commenced_mask); while (!cpu_isset(cpu, cpu_online_map)) @@ -1258,10 +1369,12 @@ void __init smp_cpus_done(unsigned int max_cpus) setup_ioapic_dest(); #endif zap_low_mappings(); +#ifndef CONFIG_HOTPLUG_CPU /* * Disable executability of the SMP trampoline: */ set_kernel_exec((unsigned long)trampoline_base, trampoline_exec); +#endif } void __init smp_intr_init(void) diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 6ef3069b571..bdd7e9f55c8 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -16,6 +16,10 @@ struct sysdev_class cpu_sysdev_class = { EXPORT_SYMBOL(cpu_sysdev_class); #ifdef CONFIG_HOTPLUG_CPU +#ifndef __HAVE_ARCH_SMP_PREPARE_CPU +#define smp_prepare_cpu(cpu) (0) +#endif + static ssize_t show_online(struct sys_device *dev, char *buf) { struct cpu *cpu = container_of(dev, struct cpu, sysdev); @@ -36,7 +40,9 @@ static ssize_t store_online(struct sys_device *dev, const char *buf, kobject_hotplug(&dev->kobj, KOBJ_OFFLINE); break; case '1': - ret = cpu_up(cpu->sysdev.id); + ret = smp_prepare_cpu(cpu->sysdev.id); + if (ret == 0) + ret = cpu_up(cpu->sysdev.id); break; default: ret = -EINVAL; diff --git a/include/asm-i386/irq.h b/include/asm-i386/irq.h index e2d8bf23ad7..270f1986b19 100644 --- a/include/asm-i386/irq.h +++ b/include/asm-i386/irq.h @@ -29,9 +29,11 @@ extern void release_vm86_irqs(struct task_struct *); #ifdef CONFIG_4KSTACKS extern void irq_ctx_init(int cpu); + extern void irq_ctx_exit(int cpu); # define __ARCH_HAS_DO_SOFTIRQ #else # define irq_ctx_init(cpu) do { } while (0) +# define irq_ctx_exit(cpu) do { } while (0) #endif #ifdef CONFIG_IRQBALANCE diff --git a/include/asm-i386/smp.h b/include/asm-i386/smp.h index 2451ead0ca5..c9996eda540 100644 --- a/include/asm-i386/smp.h +++ b/include/asm-i386/smp.h @@ -48,6 +48,14 @@ extern void unlock_ipi_call_lock(void); #define MAX_APICID 256 extern u8 x86_cpu_to_apicid[]; +#ifdef CONFIG_HOTPLUG_CPU +extern void cpu_exit_clear(void); +extern void cpu_uninit(void); + +#define __HAVE_ARCH_SMP_PREPARE_CPU +extern int smp_prepare_cpu(int cpu); +#endif + /* * This function is needed by all SMP systems. It must _always_ be valid * from the initial startup. We map APIC_BASE very early in page_setup(), -- cgit v1.2.3 From 52a119feaad92d44a0e97d01b22afbcbaf3fc079 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Sat, 25 Jun 2005 14:54:57 -0700 Subject: [PATCH] make smp_prepare_cpu to a weak function I really wish smp_prepare_cpu() would disappear eventually. In the interim this is ideally a weak function, so we dont end up changing several places to define this dummy in headers. Today since the dummy declaration is done only in drivers/base/cpu.c but the function is called in kernel/power/smp.c i get undefined reference in my cpu hotplug code for x86_64 under development. Signed-off-by: Ashok Raj Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/cpu.c | 9 +++++---- include/asm-i386/smp.h | 3 --- include/linux/cpu.h | 1 + 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index bdd7e9f55c8..0bf2dc11cdb 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -16,9 +16,10 @@ struct sysdev_class cpu_sysdev_class = { EXPORT_SYMBOL(cpu_sysdev_class); #ifdef CONFIG_HOTPLUG_CPU -#ifndef __HAVE_ARCH_SMP_PREPARE_CPU -#define smp_prepare_cpu(cpu) (0) -#endif +int __attribute__((weak)) smp_prepare_cpu (int cpu) +{ + return 0; +} static ssize_t show_online(struct sys_device *dev, char *buf) { @@ -41,7 +42,7 @@ static ssize_t store_online(struct sys_device *dev, const char *buf, break; case '1': ret = smp_prepare_cpu(cpu->sysdev.id); - if (ret == 0) + if (!ret) ret = cpu_up(cpu->sysdev.id); break; default: diff --git a/include/asm-i386/smp.h b/include/asm-i386/smp.h index c9996eda540..edad9b4712f 100644 --- a/include/asm-i386/smp.h +++ b/include/asm-i386/smp.h @@ -51,9 +51,6 @@ extern u8 x86_cpu_to_apicid[]; #ifdef CONFIG_HOTPLUG_CPU extern void cpu_exit_clear(void); extern void cpu_uninit(void); - -#define __HAVE_ARCH_SMP_PREPARE_CPU -extern int smp_prepare_cpu(int cpu); #endif /* diff --git a/include/linux/cpu.h b/include/linux/cpu.h index fe0298e5dae..e8904c0da68 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -69,6 +69,7 @@ extern struct semaphore cpucontrol; register_cpu_notifier(&fn##_nb); \ } int cpu_down(unsigned int cpu); +extern int __attribute__((weak)) smp_prepare_cpu(int cpu); #define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) #else #define lock_cpu_hotplug() do { } while (0) -- cgit v1.2.3 From e6982c671c560da4a0bc5c908cbcbec12bd5991d Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Sat, 25 Jun 2005 14:54:58 -0700 Subject: [PATCH] x86_64: Change init sections for CPU hotplug support This patch adds __cpuinit and __cpuinitdata sections that need to exist past boot to support cpu hotplug. Caveat: This is done *only* for EM64T CPU Hotplug support, on request from Andi Kleen. Much of the generic hotplug code in kernel, and none of the other archs that support CPU hotplug today, i386, ia64, ppc64, s390 and parisc dont mark sections with __cpuinit, but only mark them as __devinit, and __devinitdata. If someone is motivated to change generic code, we need to make sure all existing hotplug code does not break, on other arch's that dont use __cpuinit, and __cpudevinit. Signed-off-by: Ashok Raj Acked-by: Andi Kleen Acked-by: Zwane Mwaikambo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/apic.c | 8 ++++---- arch/x86_64/kernel/i387.c | 2 +- arch/x86_64/kernel/mce.c | 8 ++++---- arch/x86_64/kernel/mce_intel.c | 4 ++-- arch/x86_64/kernel/nmi.c | 4 ++-- arch/x86_64/kernel/process.c | 2 +- arch/x86_64/kernel/setup.c | 18 +++++++++--------- arch/x86_64/kernel/setup64.c | 6 +++--- arch/x86_64/kernel/smpboot.c | 15 +++++---------- arch/x86_64/mm/numa.c | 2 +- include/linux/init.h | 12 ++++++++++++ 11 files changed, 44 insertions(+), 37 deletions(-) diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index f8e6cc4fecd..2a6c893ccf6 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c @@ -285,7 +285,7 @@ void __init init_bsp_APIC(void) apic_write_around(APIC_LVT1, value); } -void __init setup_local_APIC (void) +void __cpuinit setup_local_APIC (void) { unsigned int value, ver, maxlvt; @@ -534,7 +534,7 @@ static struct sys_device device_lapic = { .cls = &lapic_sysclass, }; -static void __init apic_pm_activate(void) +static void __cpuinit apic_pm_activate(void) { apic_pm_state.active = 1; } @@ -774,14 +774,14 @@ void __init setup_boot_APIC_clock (void) local_irq_enable(); } -void __init setup_secondary_APIC_clock(void) +void __cpuinit setup_secondary_APIC_clock(void) { local_irq_disable(); /* FIXME: Do we need this? --RR */ setup_APIC_timer(calibration_result); local_irq_enable(); } -void __init disable_APIC_timer(void) +void __cpuinit disable_APIC_timer(void) { if (using_apic_timer) { unsigned long v; diff --git a/arch/x86_64/kernel/i387.c b/arch/x86_64/kernel/i387.c index ba139cac57c..d9b22b633e3 100644 --- a/arch/x86_64/kernel/i387.c +++ b/arch/x86_64/kernel/i387.c @@ -42,7 +42,7 @@ void mxcsr_feature_mask_init(void) * Called at bootup to set up the initial FPU state that is later cloned * into all processes. */ -void __init fpu_init(void) +void __cpuinit fpu_init(void) { unsigned long oldcr0 = read_cr0(); extern void __bad_fxsave_alignment(void); diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index 3a89d735a4f..7ab15c8ab95 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c @@ -327,7 +327,7 @@ static void mce_init(void *dummy) } /* Add per CPU specific workarounds here */ -static void __init mce_cpu_quirks(struct cpuinfo_x86 *c) +static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) { /* This should be disabled by the BIOS, but isn't always */ if (c->x86_vendor == X86_VENDOR_AMD && c->x86 == 15) { @@ -337,7 +337,7 @@ static void __init mce_cpu_quirks(struct cpuinfo_x86 *c) } } -static void __init mce_cpu_features(struct cpuinfo_x86 *c) +static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c) { switch (c->x86_vendor) { case X86_VENDOR_INTEL: @@ -352,7 +352,7 @@ static void __init mce_cpu_features(struct cpuinfo_x86 *c) * Called for each booted CPU to set up machine checks. * Must be called with preempt off. */ -void __init mcheck_init(struct cpuinfo_x86 *c) +void __cpuinit mcheck_init(struct cpuinfo_x86 *c) { static cpumask_t mce_cpus __initdata = CPU_MASK_NONE; @@ -542,7 +542,7 @@ ACCESSOR(bank4ctl,bank[4],mce_restart()) ACCESSOR(tolerant,tolerant,) ACCESSOR(check_interval,check_interval,mce_restart()) -static __init int mce_init_device(void) +static __cpuinit int mce_init_device(void) { int err; if (!mce_available(&boot_cpu_data)) diff --git a/arch/x86_64/kernel/mce_intel.c b/arch/x86_64/kernel/mce_intel.c index 4db9a640069..0be0a795981 100644 --- a/arch/x86_64/kernel/mce_intel.c +++ b/arch/x86_64/kernel/mce_intel.c @@ -42,7 +42,7 @@ done: irq_exit(); } -static void __init intel_init_thermal(struct cpuinfo_x86 *c) +static void __cpuinit intel_init_thermal(struct cpuinfo_x86 *c) { u32 l, h; int tm2 = 0; @@ -93,7 +93,7 @@ static void __init intel_init_thermal(struct cpuinfo_x86 *c) return; } -void __init mce_intel_feature_init(struct cpuinfo_x86 *c) +void __cpuinit mce_intel_feature_init(struct cpuinfo_x86 *c) { intel_init_thermal(c); } diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 31c0f2e6ac9..4e44d6e6b7e 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c @@ -98,7 +98,7 @@ static unsigned int nmi_p4_cccr_val; (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) -static __init inline int nmi_known_cpu(void) +static __cpuinit inline int nmi_known_cpu(void) { switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: @@ -110,7 +110,7 @@ static __init inline int nmi_known_cpu(void) } /* Run after command line and cpu_init init, but before all other checks */ -void __init nmi_watchdog_default(void) +void __cpuinit nmi_watchdog_default(void) { if (nmi_watchdog != NMI_DEFAULT) return; diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index e59d1f9d616..15fa35a81a7 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -204,7 +204,7 @@ static void mwait_idle(void) } } -void __init select_idle_routine(const struct cpuinfo_x86 *c) +void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) { static int printed; if (cpu_has(c, X86_FEATURE_MWAIT)) { diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 000015dd5a8..ca9a915a6d4 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -676,7 +676,7 @@ void __init setup_arch(char **cmdline_p) #endif } -static int __init get_model_name(struct cpuinfo_x86 *c) +static int __cpuinit get_model_name(struct cpuinfo_x86 *c) { unsigned int *v; @@ -692,7 +692,7 @@ static int __init get_model_name(struct cpuinfo_x86 *c) } -static void __init display_cacheinfo(struct cpuinfo_x86 *c) +static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) { unsigned int n, dummy, eax, ebx, ecx, edx; @@ -803,7 +803,7 @@ static int __init init_amd(struct cpuinfo_x86 *c) return r; } -static void __init detect_ht(struct cpuinfo_x86 *c) +static void __cpuinit detect_ht(struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP u32 eax, ebx, ecx, edx; @@ -864,7 +864,7 @@ static void __init detect_ht(struct cpuinfo_x86 *c) /* * find out the number of processor cores on the die */ -static int __init intel_num_cpu_cores(struct cpuinfo_x86 *c) +static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) { unsigned int eax; @@ -882,7 +882,7 @@ static int __init intel_num_cpu_cores(struct cpuinfo_x86 *c) return 1; } -static void __init init_intel(struct cpuinfo_x86 *c) +static void __cpuinit init_intel(struct cpuinfo_x86 *c) { /* Cache sizes */ unsigned n; @@ -902,7 +902,7 @@ static void __init init_intel(struct cpuinfo_x86 *c) c->x86_num_cores = intel_num_cpu_cores(c); } -void __init get_cpu_vendor(struct cpuinfo_x86 *c) +void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) { char *v = c->x86_vendor_id; @@ -923,7 +923,7 @@ struct cpu_model_info { /* Do some early cpuid on the boot CPU to get some parameter that are needed before check_bugs. Everything advanced is in identify_cpu below. */ -void __init early_identify_cpu(struct cpuinfo_x86 *c) +void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) { u32 tfms; @@ -977,7 +977,7 @@ void __init early_identify_cpu(struct cpuinfo_x86 *c) /* * This does the hard work of actually picking apart the CPU stuff... */ -void __init identify_cpu(struct cpuinfo_x86 *c) +void __cpuinit identify_cpu(struct cpuinfo_x86 *c) { int i; u32 xlvl; @@ -1054,7 +1054,7 @@ void __init identify_cpu(struct cpuinfo_x86 *c) } -void __init print_cpu_info(struct cpuinfo_x86 *c) +void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) { if (c->x86_model_id[0]) printk("%s", c->x86_model_id); diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index 678b7ac33b8..f2b9c6bc999 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c @@ -29,7 +29,7 @@ char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,}; -cpumask_t cpu_initialized __initdata = CPU_MASK_NONE; +cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; struct x8664_pda cpu_pda[NR_CPUS] __cacheline_aligned; @@ -171,7 +171,7 @@ void syscall_init(void) wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE|0x3000); } -void __init check_efer(void) +void __cpuinit check_efer(void) { unsigned long efer; @@ -188,7 +188,7 @@ void __init check_efer(void) * 'CPU state barrier', nothing should get across. * A lot of state is already set up in PDA init. */ -void __init cpu_init (void) +void __cpuinit cpu_init (void) { #ifdef CONFIG_SMP int cpu = stack_smp_processor_id(); diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index f1ec0f34594..bc98a6722cb 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -58,11 +58,6 @@ #include #include -/* Change for real CPU hotplug. Note other files need to be fixed - first too. */ -#define __cpuinit __init -#define __cpuinitdata __initdata - /* Number of siblings per CPU package */ int smp_num_siblings = 1; /* Package ID of each logical CPU */ @@ -823,7 +818,7 @@ static __cpuinit void smp_cleanup_boot(void) * * RED-PEN audit/test this more. I bet there is more state messed up here. */ -static __cpuinit void disable_smp(void) +static __init void disable_smp(void) { cpu_present_map = cpumask_of_cpu(0); cpu_possible_map = cpumask_of_cpu(0); @@ -838,7 +833,7 @@ static __cpuinit void disable_smp(void) /* * Handle user cpus=... parameter. */ -static __cpuinit void enforce_max_cpus(unsigned max_cpus) +static __init void enforce_max_cpus(unsigned max_cpus) { int i, k; k = 0; @@ -855,7 +850,7 @@ static __cpuinit void enforce_max_cpus(unsigned max_cpus) /* * Various sanity checks. */ -static int __cpuinit smp_sanity_check(unsigned max_cpus) +static int __init smp_sanity_check(unsigned max_cpus) { if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { printk("weird, boot CPU (#%d) not listed by the BIOS.\n", @@ -913,7 +908,7 @@ static int __cpuinit smp_sanity_check(unsigned max_cpus) * Prepare for SMP bootup. The MP table or ACPI has been read * earlier. Just do some sanity checking here and enable APIC mode. */ -void __cpuinit smp_prepare_cpus(unsigned int max_cpus) +void __init smp_prepare_cpus(unsigned int max_cpus) { int i; @@ -1019,7 +1014,7 @@ int __cpuinit __cpu_up(unsigned int cpu) /* * Finish the SMP boot. */ -void __cpuinit smp_cpus_done(unsigned int max_cpus) +void __init smp_cpus_done(unsigned int max_cpus) { zap_low_mappings(); smp_cleanup_boot(); diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index 84cde796ecb..ac61c186eb0 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c @@ -251,7 +251,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT); } -__init void numa_add_cpu(int cpu) +__cpuinit void numa_add_cpu(int cpu) { /* BP is initialized elsewhere */ if (cpu) diff --git a/include/linux/init.h b/include/linux/init.h index 05c83e0521c..59008c3826c 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -229,6 +229,18 @@ void __init parse_early_param(void); #define __devexitdata __exitdata #endif +#ifdef CONFIG_HOTPLUG_CPU +#define __cpuinit +#define __cpuinitdata +#define __cpuexit +#define __cpuexitdata +#else +#define __cpuinit __init +#define __cpuinitdata __initdata +#define __cpuexit __exit +#define __cpuexitdata __exitdata +#endif + /* Functions marked as __devexit may be discarded at kernel link time, depending on config options. Newer versions of binutils detect references from retained sections to discarded sections and flag an error. Pointers to -- cgit v1.2.3 From 76e4f660d9f4c6d1bb473f72be2988c35eaca948 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Sat, 25 Jun 2005 14:55:00 -0700 Subject: [PATCH] x86_64: CPU hotplug support Experimental CPU hotplug patch for x86_64 ----------------------------------------- This supports logical CPU online and offline. - Test with maxcpus=1, and then kick other cpu's off to test if init code is all cleaned up. CONFIG_SCHED_SMT works as well. - idle threads are forked on demand from keventd threads for clean startup TBD: 1. Not tested on a real NUMA machine (tested with numa=fake=2) 2. Handle ACPI pieces for physical hotplug support. Signed-off-by: Ashok Raj Acked-by: Andi Kleen Acked-by: Zwane Mwaikambo Signed-off-by: Shaohua.li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/mach-default/topology.c | 15 ++- arch/x86_64/Kconfig | 9 ++ arch/x86_64/kernel/irq.c | 29 ++++++ arch/x86_64/kernel/process.c | 29 +++++- arch/x86_64/kernel/smpboot.c | 205 +++++++++++++++++++++++++++++++++++--- arch/x86_64/kernel/traps.c | 8 +- include/asm-x86_64/irq.h | 5 + include/asm-x86_64/smp.h | 2 + 8 files changed, 277 insertions(+), 25 deletions(-) diff --git a/arch/i386/mach-default/topology.c b/arch/i386/mach-default/topology.c index 5b3e8817dae..23395fff35d 100644 --- a/arch/i386/mach-default/topology.c +++ b/arch/i386/mach-default/topology.c @@ -73,12 +73,11 @@ static int __init topology_init(void) { int i; - for (i = 0; i < MAX_NUMNODES; i++) { - if (node_online(i)) - arch_register_node(i); - } - for (i = 0; i < NR_CPUS; i++) - if (cpu_possible(i)) arch_register_cpu(i); + for_each_online_node(i) + arch_register_node(i); + + for_each_cpu(i) + arch_register_cpu(i); return 0; } @@ -88,8 +87,8 @@ static int __init topology_init(void) { int i; - for (i = 0; i < NR_CPUS; i++) - if (cpu_possible(i)) arch_register_cpu(i); + for_each_cpu(i) + arch_register_cpu(i); return 0; } diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index db259757dc8..92f5a526602 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -313,6 +313,15 @@ config NR_CPUS This is purely to save memory - each supported CPU requires memory in the static kernel configuration. +config HOTPLUG_CPU + bool "Support for hot-pluggable CPUs (EXPERIMENTAL)" + depends on SMP && HOTPLUG && EXPERIMENTAL + help + Say Y here to experiment with turning CPUs off and on. CPUs + can be controlled through /sys/devices/system/cpu/cpu#. + Say N if you want to disable CPU hotplug. + + config HPET_TIMER bool default y diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c index 62b112e4deb..cc3fb85f514 100644 --- a/arch/x86_64/kernel/irq.c +++ b/arch/x86_64/kernel/irq.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -106,3 +107,31 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs) return 1; } +#ifdef CONFIG_HOTPLUG_CPU +void fixup_irqs(cpumask_t map) +{ + unsigned int irq; + static int warned; + + for (irq = 0; irq < NR_IRQS; irq++) { + cpumask_t mask; + if (irq == 2) + continue; + + cpus_and(mask, irq_affinity[irq], map); + if (any_online_cpu(mask) == NR_CPUS) { + printk("Breaking affinity for irq %i\n", irq); + mask = map; + } + if (irq_desc[irq].handler->set_affinity) + irq_desc[irq].handler->set_affinity(irq, mask); + else if (irq_desc[irq].action && !(warned++)) + printk("Cannot set affinity for irq %i\n", irq); + } + + /* That doesn't seem sufficient. Give it 1ms. */ + local_irq_enable(); + mdelay(1); + local_irq_disable(); +} +#endif diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 15fa35a81a7..1d91271796e 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -8,7 +8,8 @@ * * X86-64 port * Andi Kleen. - * + * + * CPU hotplug support - ashok.raj@intel.com * $Id: process.c,v 1.38 2002/01/15 10:08:03 ak Exp $ */ @@ -18,6 +19,7 @@ #include +#include #include #include #include @@ -154,6 +156,29 @@ void cpu_idle_wait(void) } EXPORT_SYMBOL_GPL(cpu_idle_wait); +#ifdef CONFIG_HOTPLUG_CPU +DECLARE_PER_CPU(int, cpu_state); + +#include +/* We don't actually take CPU down, just spin without interrupts. */ +static inline void play_dead(void) +{ + idle_task_exit(); + wbinvd(); + mb(); + /* Ack it */ + __get_cpu_var(cpu_state) = CPU_DEAD; + + while (1) + safe_halt(); +} +#else +static inline void play_dead(void) +{ + BUG(); +} +#endif /* CONFIG_HOTPLUG_CPU */ + /* * The idle thread. There's no useful work to be * done, so just try to conserve power and have a @@ -174,6 +199,8 @@ void cpu_idle (void) idle = pm_idle; if (!idle) idle = default_idle; + if (cpu_is_offline(smp_processor_id())) + play_dead(); idle(); } diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index bc98a6722cb..5a3f955b657 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -34,6 +34,7 @@ * Andi Kleen : Converted to new state machine. * Various cleanups. * Probably mostly hotplug CPU ready now. + * Ashok Raj : CPU hotplug support */ @@ -98,6 +99,37 @@ EXPORT_SYMBOL(cpu_core_map); extern unsigned char trampoline_data[]; extern unsigned char trampoline_end[]; +/* State of each CPU */ +DEFINE_PER_CPU(int, cpu_state) = { 0 }; + +/* + * Store all idle threads, this can be reused instead of creating + * a new thread. Also avoids complicated thread destroy functionality + * for idle threads. + */ +struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; + +#define get_idle_for_cpu(x) (idle_thread_array[(x)]) +#define set_idle_for_cpu(x,p) (idle_thread_array[(x)] = (p)) + +/* + * cpu_possible_map should be static, it cannot change as cpu's + * are onlined, or offlined. The reason is per-cpu data-structures + * are allocated by some modules at init time, and dont expect to + * do this dynamically on cpu arrival/departure. + * cpu_present_map on the other hand can change dynamically. + * In case when cpu_hotplug is not compiled, then we resort to current + * behaviour, which is cpu_possible == cpu_present. + * If cpu-hotplug is supported, then we need to preallocate for all + * those NR_CPUS, hence cpu_possible_map represents entire NR_CPUS range. + * - Ashok Raj + */ +#ifdef CONFIG_HOTPLUG_CPU +#define fixup_cpu_possible_map(x) cpu_set((x), cpu_possible_map) +#else +#define fixup_cpu_possible_map(x) +#endif + /* * Currently trivial. Write the real->protected mode * bootstrap into the page concerned. The caller @@ -623,33 +655,77 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta return (send_status | accept_status); } +struct create_idle { + struct task_struct *idle; + struct completion done; + int cpu; +}; + +void do_fork_idle(void *_c_idle) +{ + struct create_idle *c_idle = _c_idle; + + c_idle->idle = fork_idle(c_idle->cpu); + complete(&c_idle->done); +} + /* * Boot one CPU. */ static int __cpuinit do_boot_cpu(int cpu, int apicid) { - struct task_struct *idle; unsigned long boot_error; int timeout; unsigned long start_rip; + struct create_idle c_idle = { + .cpu = cpu, + .done = COMPLETION_INITIALIZER(c_idle.done), + }; + DECLARE_WORK(work, do_fork_idle, &c_idle); + + c_idle.idle = get_idle_for_cpu(cpu); + + if (c_idle.idle) { + c_idle.idle->thread.rsp = (unsigned long) (((struct pt_regs *) + (THREAD_SIZE + (unsigned long) c_idle.idle->thread_info)) - 1); + init_idle(c_idle.idle, cpu); + goto do_rest; + } + /* - * We can't use kernel_thread since we must avoid to - * reschedule the child. + * During cold boot process, keventd thread is not spun up yet. + * When we do cpu hot-add, we create idle threads on the fly, we should + * not acquire any attributes from the calling context. Hence the clean + * way to create kernel_threads() is to do that from keventd(). + * We do the current_is_keventd() due to the fact that ACPI notifier + * was also queuing to keventd() and when the caller is already running + * in context of keventd(), we would end up with locking up the keventd + * thread. */ - idle = fork_idle(cpu); - if (IS_ERR(idle)) { + if (!keventd_up() || current_is_keventd()) + work.func(work.data); + else { + schedule_work(&work); + wait_for_completion(&c_idle.done); + } + + if (IS_ERR(c_idle.idle)) { printk("failed fork for CPU %d\n", cpu); - return PTR_ERR(idle); + return PTR_ERR(c_idle.idle); } - cpu_pda[cpu].pcurrent = idle; + set_idle_for_cpu(cpu, c_idle.idle); + +do_rest: + + cpu_pda[cpu].pcurrent = c_idle.idle; start_rip = setup_trampoline(); - init_rsp = idle->thread.rsp; + init_rsp = c_idle.idle->thread.rsp; per_cpu(init_tss,cpu).rsp0 = init_rsp; initial_code = start_secondary; - clear_ti_thread_flag(idle->thread_info, TIF_FORK); + clear_ti_thread_flag(c_idle.idle->thread_info, TIF_FORK); printk(KERN_INFO "Booting processor %d/%d rip %lx rsp %lx\n", cpu, apicid, start_rip, init_rsp); @@ -925,10 +1001,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus) int apicid = cpu_present_to_apicid(i); if (physid_isset(apicid, phys_cpu_present_map)) { cpu_set(i, cpu_present_map); - /* possible map would be different if we supported real - CPU hotplug. */ cpu_set(i, cpu_possible_map); } + fixup_cpu_possible_map(i); } if (smp_sanity_check(max_cpus) < 0) { @@ -977,9 +1052,6 @@ void __init smp_prepare_boot_cpu(void) /* * Entry point to boot a CPU. - * - * This is all __cpuinit, not __devinit for now because we don't support - * CPU hotplug (yet). */ int __cpuinit __cpu_up(unsigned int cpu) { @@ -996,6 +1068,14 @@ int __cpuinit __cpu_up(unsigned int cpu) return -EINVAL; } + /* + * Already booted CPU? + */ + if (cpu_isset(cpu, cpu_callin_map)) { + Dprintk("do_boot_cpu %d Already started\n", cpu); + return -ENOSYS; + } + /* Boot it! */ err = do_boot_cpu(cpu, apicid); if (err < 0) { @@ -1008,7 +1088,9 @@ int __cpuinit __cpu_up(unsigned int cpu) while (!cpu_isset(cpu, cpu_online_map)) cpu_relax(); - return 0; + err = 0; + + return err; } /* @@ -1016,7 +1098,9 @@ int __cpuinit __cpu_up(unsigned int cpu) */ void __init smp_cpus_done(unsigned int max_cpus) { +#ifndef CONFIG_HOTPLUG_CPU zap_low_mappings(); +#endif smp_cleanup_boot(); #ifdef CONFIG_X86_IO_APIC @@ -1028,3 +1112,94 @@ void __init smp_cpus_done(unsigned int max_cpus) check_nmi_watchdog(); } + +#ifdef CONFIG_HOTPLUG_CPU + +static void +remove_siblinginfo(int cpu) +{ + int sibling; + + for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) + cpu_clear(cpu, cpu_sibling_map[sibling]); + for_each_cpu_mask(sibling, cpu_core_map[cpu]) + cpu_clear(cpu, cpu_core_map[sibling]); + cpus_clear(cpu_sibling_map[cpu]); + cpus_clear(cpu_core_map[cpu]); + phys_proc_id[cpu] = BAD_APICID; + cpu_core_id[cpu] = BAD_APICID; +} + +void remove_cpu_from_maps(void) +{ + int cpu = smp_processor_id(); + + cpu_clear(cpu, cpu_callout_map); + cpu_clear(cpu, cpu_callin_map); + clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */ +} + +int __cpu_disable(void) +{ + int cpu = smp_processor_id(); + + /* + * Perhaps use cpufreq to drop frequency, but that could go + * into generic code. + * + * We won't take down the boot processor on i386 due to some + * interrupts only being able to be serviced by the BSP. + * Especially so if we're not using an IOAPIC -zwane + */ + if (cpu == 0) + return -EBUSY; + + disable_APIC_timer(); + + /* + * HACK: + * Allow any queued timer interrupts to get serviced + * This is only a temporary solution until we cleanup + * fixup_irqs as we do for IA64. + */ + local_irq_enable(); + mdelay(1); + + local_irq_disable(); + remove_siblinginfo(cpu); + + /* It's now safe to remove this processor from the online map */ + cpu_clear(cpu, cpu_online_map); + remove_cpu_from_maps(); + fixup_irqs(cpu_online_map); + return 0; +} + +void __cpu_die(unsigned int cpu) +{ + /* We don't do anything here: idle task is faking death itself. */ + unsigned int i; + + for (i = 0; i < 10; i++) { + /* They ack this in play_dead by setting CPU_DEAD */ + if (per_cpu(cpu_state, cpu) == CPU_DEAD) + return; + current->state = TASK_UNINTERRUPTIBLE; + schedule_timeout(HZ/10); + } + printk(KERN_ERR "CPU %u didn't die...\n", cpu); +} + +#else /* ... !CONFIG_HOTPLUG_CPU */ + +int __cpu_disable(void) +{ + return -ENOSYS; +} + +void __cpu_die(unsigned int cpu) +{ + /* We said "no" in __cpu_disable */ + BUG(); +} +#endif /* CONFIG_HOTPLUG_CPU */ diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 121646fc43f..10273663000 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -586,11 +586,17 @@ static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) asmlinkage void default_do_nmi(struct pt_regs *regs) { unsigned char reason = 0; + int cpu; + + cpu = smp_processor_id(); /* Only the BSP gets external NMIs from the system. */ - if (!smp_processor_id()) + if (!cpu) reason = get_nmi_reason(); + if (!cpu_online(cpu)) + return; + if (!(reason & 0xc0)) { if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 0, SIGINT) == NOTIFY_STOP) diff --git a/include/asm-x86_64/irq.h b/include/asm-x86_64/irq.h index 3af50b3c3b0..eb3b7aa9eb9 100644 --- a/include/asm-x86_64/irq.h +++ b/include/asm-x86_64/irq.h @@ -52,4 +52,9 @@ struct irqaction; struct pt_regs; int handle_IRQ_event(unsigned int, struct pt_regs *, struct irqaction *); +#ifdef CONFIG_HOTPLUG_CPU +#include +extern void fixup_irqs(cpumask_t map); +#endif + #endif /* _ASM_IRQ_H */ diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h index a7425aa5a3b..9c6242fb99d 100644 --- a/include/asm-x86_64/smp.h +++ b/include/asm-x86_64/smp.h @@ -77,6 +77,8 @@ extern __inline int hard_smp_processor_id(void) } extern int safe_smp_processor_id(void); +extern int __cpu_disable(void); +extern void __cpu_die(unsigned int cpu); #endif /* !ASSEMBLY */ -- cgit v1.2.3 From cb0cd8d49a9b81aff7a02e2ed826b5cfdfe9a172 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Sat, 25 Jun 2005 14:55:01 -0700 Subject: [PATCH] x86_64: CPU hotplug sibling map cleanup This patch is a minor cleanup to the cpu sibling/core map. It is required that this setup happens on a per-cpu bringup time. Signed-off-by: Ashok Raj Acked-by: Andi Kleen Acked-by: Zwane Mwaikambo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/smpboot.c | 84 +++++++++++++++++++------------------------- 1 file changed, 36 insertions(+), 48 deletions(-) diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 5a3f955b657..571a55462fa 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -445,6 +445,33 @@ void __cpuinit smp_callin(void) cpu_set(cpuid, cpu_callin_map); } +static inline void set_cpu_sibling_map(int cpu) +{ + int i; + + if (smp_num_siblings > 1) { + for_each_cpu(i) { + if (cpu_core_id[cpu] == cpu_core_id[i]) { + cpu_set(i, cpu_sibling_map[cpu]); + cpu_set(cpu, cpu_sibling_map[i]); + } + } + } else { + cpu_set(cpu, cpu_sibling_map[cpu]); + } + + if (current_cpu_data.x86_num_cores > 1) { + for_each_cpu(i) { + if (phys_proc_id[cpu] == phys_proc_id[i]) { + cpu_set(i, cpu_core_map[cpu]); + cpu_set(cpu, cpu_core_map[i]); + } + } + } else { + cpu_core_map[cpu] = cpu_sibling_map[cpu]; + } +} + /* * Setup code on secondary processor (after comming out of the trampoline) */ @@ -474,6 +501,12 @@ void __cpuinit start_secondary(void) enable_APIC_timer(); + /* + * The sibling maps must be set before turing the online map on for + * this cpu + */ + set_cpu_sibling_map(smp_processor_id()); + /* * Allow the master to continue. */ @@ -816,51 +849,6 @@ do_rest: cycles_t cacheflush_time; unsigned long cache_decay_ticks; -/* - * Construct cpu_sibling_map[], so that we can tell the sibling CPU - * on SMT systems efficiently. - */ -static __cpuinit void detect_siblings(void) -{ - int cpu; - - for (cpu = 0; cpu < NR_CPUS; cpu++) { - cpus_clear(cpu_sibling_map[cpu]); - cpus_clear(cpu_core_map[cpu]); - } - - for_each_online_cpu (cpu) { - struct cpuinfo_x86 *c = cpu_data + cpu; - int siblings = 0; - int i; - if (smp_num_siblings > 1) { - for_each_online_cpu (i) { - if (cpu_core_id[cpu] == cpu_core_id[i]) { - siblings++; - cpu_set(i, cpu_sibling_map[cpu]); - } - } - } else { - siblings++; - cpu_set(cpu, cpu_sibling_map[cpu]); - } - - if (siblings != smp_num_siblings) { - printk(KERN_WARNING - "WARNING: %d siblings found for CPU%d, should be %d\n", - siblings, cpu, smp_num_siblings); - smp_num_siblings = siblings; - } - if (c->x86_num_cores > 1) { - for_each_online_cpu(i) { - if (phys_proc_id[cpu] == phys_proc_id[i]) - cpu_set(i, cpu_core_map[cpu]); - } - } else - cpu_core_map[cpu] = cpu_sibling_map[cpu]; - } -} - /* * Cleanup possible dangling ends... */ @@ -1048,6 +1036,8 @@ void __init smp_prepare_boot_cpu(void) int me = smp_processor_id(); cpu_set(me, cpu_online_map); cpu_set(me, cpu_callout_map); + cpu_set(0, cpu_sibling_map[0]); + cpu_set(0, cpu_core_map[0]); } /* @@ -1107,7 +1097,6 @@ void __init smp_cpus_done(unsigned int max_cpus) setup_ioapic_dest(); #endif - detect_siblings(); time_init_gtod(); check_nmi_watchdog(); @@ -1115,8 +1104,7 @@ void __init smp_cpus_done(unsigned int max_cpus) #ifdef CONFIG_HOTPLUG_CPU -static void -remove_siblinginfo(int cpu) +static void remove_siblinginfo(int cpu) { int sibling; -- cgit v1.2.3 From 884d9e40b4089014f40c49e86ac6505842db2b53 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Sat, 25 Jun 2005 14:55:02 -0700 Subject: [PATCH] x86_64: Dont use broadcast shortcut to make it cpu hotplug safe. Broadcast IPI's provide un-expected behaviour for cpu hotplug. CPU's in offline state also end up receiving the IPI. Once the cpus become online they receive these stale IPI's which are bad and introduce unexpected behaviour. This is easily avoided by not sending a broadcast and addressing just the CPU's in online map. Doing prelim cycle counts it appears there is no big overhead and numbers seem around 0x3000-0x3900 on an average on x86 and x86_64 systems with CPUS running 3G, both for broadcast and mask version of the API's. The shortcuts are useful only for flat mode (where the perf shows no degradation), and in cluster mode, its unicast anyway. Its simpler to just not use broadcast anymore. Signed-off-by: Ashok Raj Acked-by: Andi Kleen Acked-by: Zwane Mwaikambo Signed-off-by: Shaohua Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/genapic_flat.c | 42 ++++++++++++++++++++++++--------------- arch/x86_64/kernel/smp.c | 10 ++++++++++ arch/x86_64/kernel/smpboot.c | 19 +++++++++++++++++- include/asm-x86_64/smp.h | 2 ++ 4 files changed, 56 insertions(+), 17 deletions(-) diff --git a/arch/x86_64/kernel/genapic_flat.c b/arch/x86_64/kernel/genapic_flat.c index b4cbbad0422..00f3fa6df71 100644 --- a/arch/x86_64/kernel/genapic_flat.c +++ b/arch/x86_64/kernel/genapic_flat.c @@ -7,6 +7,8 @@ * Hacked for x86-64 by James Cleverdon from i386 architecture code by * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and * James Cleverdon. + * Ashok Raj + * Removed IPI broadcast shortcut to support CPU hotplug */ #include #include @@ -45,22 +47,6 @@ static void flat_init_apic_ldr(void) apic_write_around(APIC_LDR, val); } -static void flat_send_IPI_allbutself(int vector) -{ - /* - * if there are no other CPUs in the system then - * we get an APIC send error if we try to broadcast. - * thus we have to avoid sending IPIs in this case. - */ - if (num_online_cpus() > 1) - __send_IPI_shortcut(APIC_DEST_ALLBUT, vector, APIC_DEST_LOGICAL); -} - -static void flat_send_IPI_all(int vector) -{ - __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); -} - static void flat_send_IPI_mask(cpumask_t cpumask, int vector) { unsigned long mask = cpus_addr(cpumask)[0]; @@ -93,6 +79,30 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector) local_irq_restore(flags); } +static void flat_send_IPI_allbutself(int vector) +{ + cpumask_t mask; + /* + * if there are no other CPUs in the system then + * we get an APIC send error if we try to broadcast. + * thus we have to avoid sending IPIs in this case. + */ + int this_cpu = get_cpu(); + + mask = cpu_online_map; + cpu_clear(this_cpu, mask); + + if (cpus_weight(mask) >= 1) + flat_send_IPI_mask(mask, vector); + + put_cpu(); +} + +static void flat_send_IPI_all(int vector) +{ + flat_send_IPI_mask(cpu_online_map, vector); +} + static int flat_apic_id_registered(void) { return physid_isset(GET_APIC_ID(apic_read(APIC_ID)), phys_cpu_present_map); diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c index 1e379ed17b1..ccae392886a 100644 --- a/arch/x86_64/kernel/smp.c +++ b/arch/x86_64/kernel/smp.c @@ -283,6 +283,16 @@ struct call_data_struct { static struct call_data_struct * call_data; +void lock_ipi_call_lock(void) +{ + spin_lock_irq(&call_lock); +} + +void unlock_ipi_call_lock(void) +{ + spin_unlock_irq(&call_lock); +} + /* * this function sends a 'generic call function' IPI to all other CPUs * in the system. diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 571a55462fa..b969ee12872 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -507,10 +507,23 @@ void __cpuinit start_secondary(void) */ set_cpu_sibling_map(smp_processor_id()); + /* + * We need to hold call_lock, so there is no inconsistency + * between the time smp_call_function() determines number of + * IPI receipients, and the time when the determination is made + * for which cpus receive the IPI in genapic_flat.c. Holding this + * lock helps us to not include this cpu in a currently in progress + * smp_call_function(). + */ + lock_ipi_call_lock(); + /* * Allow the master to continue. */ cpu_set(smp_processor_id(), cpu_online_map); + per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; + unlock_ipi_call_lock(); + mb(); /* Wait for TSC sync to not schedule things before. @@ -1038,6 +1051,7 @@ void __init smp_prepare_boot_cpu(void) cpu_set(me, cpu_callout_map); cpu_set(0, cpu_sibling_map[0]); cpu_set(0, cpu_core_map[0]); + per_cpu(cpu_state, me) = CPU_ONLINE; } /* @@ -1066,6 +1080,7 @@ int __cpuinit __cpu_up(unsigned int cpu) return -ENOSYS; } + per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; /* Boot it! */ err = do_boot_cpu(cpu, apicid); if (err < 0) { @@ -1170,8 +1185,10 @@ void __cpu_die(unsigned int cpu) for (i = 0; i < 10; i++) { /* They ack this in play_dead by setting CPU_DEAD */ - if (per_cpu(cpu_state, cpu) == CPU_DEAD) + if (per_cpu(cpu_state, cpu) == CPU_DEAD) { + printk ("CPU %d is now offline\n", cpu); return; + } current->state = TASK_UNINTERRUPTIBLE; schedule_timeout(HZ/10); } diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h index 9c6242fb99d..aeb1b73e21e 100644 --- a/include/asm-x86_64/smp.h +++ b/include/asm-x86_64/smp.h @@ -43,6 +43,8 @@ extern cpumask_t cpu_callout_map; extern void smp_alloc_memory(void); extern volatile unsigned long smp_invalidate_needed; extern int pic_mode; +extern void lock_ipi_call_lock(void); +extern void unlock_ipi_call_lock(void); extern int smp_num_siblings; extern void smp_flush_tlb(void); extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs); -- cgit v1.2.3 From a02c4cb67e4ccd5ce7a13c7f04c2fedb06c35431 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Sat, 25 Jun 2005 14:55:03 -0700 Subject: [PATCH] x86_64: Provide ability to choose using shortcuts for IPI in flat mode. This patch provides an option to switch broadcast or use mask version for sending IPI's. If CONFIG_HOTPLUG_CPU is defined, we choose not to use broadcast shortcuts by default, otherwise we choose broadcast mode as default. both cases, one can change this via startup cmd line option, to choose no-broadcast mode. no_ipi_broadcast=1 This is provided on request from Andi Kleen, since he doesnt agree with replacing IPI shortcuts as a solution for CPU hotplug. Without removing broadcast IPI's, it would mean lots of new code for __cpu_up() path, which would acheive the same results. Signed-off-by: Ashok Raj Acked-by: Andi Kleen Acked-by: Zwane Mwaikambo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/genapic_flat.c | 96 +++++++++++++++++++++++++++++++++------ 1 file changed, 82 insertions(+), 14 deletions(-) diff --git a/arch/x86_64/kernel/genapic_flat.c b/arch/x86_64/kernel/genapic_flat.c index 00f3fa6df71..28284696508 100644 --- a/arch/x86_64/kernel/genapic_flat.c +++ b/arch/x86_64/kernel/genapic_flat.c @@ -20,6 +20,46 @@ #include #include +/* + * The following permit choosing broadcast IPI shortcut v.s sending IPI only + * to online cpus via the send_IPI_mask varient. + * The mask version is my preferred option, since it eliminates a lot of + * other extra code that would need to be written to cleanup intrs sent + * to a CPU while offline. + * + * Sending broadcast introduces lots of trouble in CPU hotplug situations. + * These IPI's are delivered to cpu's irrespective of their offline status + * and could pickup stale intr data when these CPUS are turned online. + * + * Not using broadcast is a cleaner approach IMO, but Andi Kleen disagrees with + * the idea of not using broadcast IPI's anymore. Hence the run time check + * is introduced, on his request so we can choose an alternate mechanism. + * + * Initial wacky performance tests that collect cycle counts show + * no increase in using mask v.s broadcast version. In fact they seem + * identical in terms of cycle counts. + * + * if we need to use broadcast, we need to do the following. + * + * cli; + * hold call_lock; + * clear any pending IPI, just ack and clear all pending intr + * set cpu_online_map; + * release call_lock; + * sti; + * + * The complicated dummy irq processing shown above is not required if + * we didnt sent IPI's to wrong CPU's in the first place. + * + * - Ashok Raj + */ +#ifdef CONFIG_HOTPLUG_CPU +#define DEFAULT_SEND_IPI (1) +#else +#define DEFAULT_SEND_IPI (0) +#endif + +static int no_broadcast=DEFAULT_SEND_IPI; static cpumask_t flat_target_cpus(void) { @@ -79,28 +119,37 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector) local_irq_restore(flags); } -static void flat_send_IPI_allbutself(int vector) +static inline void __local_flat_send_IPI_allbutself(int vector) { - cpumask_t mask; - /* - * if there are no other CPUs in the system then - * we get an APIC send error if we try to broadcast. - * thus we have to avoid sending IPIs in this case. - */ - int this_cpu = get_cpu(); - - mask = cpu_online_map; - cpu_clear(this_cpu, mask); + if (no_broadcast) { + cpumask_t mask = cpu_online_map; + int this_cpu = get_cpu(); - if (cpus_weight(mask) >= 1) + cpu_clear(this_cpu, mask); flat_send_IPI_mask(mask, vector); + put_cpu(); + } + else + __send_IPI_shortcut(APIC_DEST_ALLBUT, vector, APIC_DEST_LOGICAL); +} - put_cpu(); +static inline void __local_flat_send_IPI_all(int vector) +{ + if (no_broadcast) + flat_send_IPI_mask(cpu_online_map, vector); + else + __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); +} + +static void flat_send_IPI_allbutself(int vector) +{ + if (((num_online_cpus()) - 1) >= 1) + __local_flat_send_IPI_allbutself(vector); } static void flat_send_IPI_all(int vector) { - flat_send_IPI_mask(cpu_online_map, vector); + __local_flat_send_IPI_all(vector); } static int flat_apic_id_registered(void) @@ -121,6 +170,16 @@ static unsigned int phys_pkg_id(int index_msb) return ((ebx >> 24) & 0xFF) >> index_msb; } +static __init int no_ipi_broadcast(char *str) +{ + get_option(&str, &no_broadcast); + printk ("Using %s mode\n", no_broadcast ? "No IPI Broadcast" : + "IPI Broadcast"); + return 1; +} + +__setup("no_ipi_broadcast", no_ipi_broadcast); + struct genapic apic_flat = { .name = "flat", .int_delivery_mode = dest_LowestPrio, @@ -135,3 +194,12 @@ struct genapic apic_flat = { .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, }; + +static int __init print_ipi_mode(void) +{ + printk ("Using IPI %s mode\n", no_broadcast ? "No-Shortcut" : + "Shortcut"); + return 0; +} + +late_initcall(print_ipi_mode); -- cgit v1.2.3 From a9fa06c26f7b7914c8cdf4d309b74df3151cc227 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Sat, 25 Jun 2005 14:55:05 -0700 Subject: [PATCH] set cpu_state for CPU hotplug (ia64) Dead CPU notifies online CPU that it's dead using cpu_state variable. After switching to physical cpu hotplug, we forgot setting the variable. This patch fixes it. Currently only __cpu_die uses it. We changed other locations for consistency in case others use it. Signed-off-by: Shaohua Li Acked-by: Ashok Raj Cc: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/kernel/smpboot.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index a888ddc10f7..623b0a54670 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -346,6 +346,7 @@ smp_callin (void) lock_ipi_calllock(); cpu_set(cpuid, cpu_online_map); unlock_ipi_calllock(); + per_cpu(cpu_state, cpuid) = CPU_ONLINE; smp_setup_percpu_timer(); @@ -611,6 +612,7 @@ void __devinit smp_prepare_boot_cpu(void) { cpu_set(smp_processor_id(), cpu_online_map); cpu_set(smp_processor_id(), cpu_callin_map); + per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; } /* @@ -775,6 +777,7 @@ __cpu_up (unsigned int cpu) if (cpu_isset(cpu, cpu_callin_map)) return -EINVAL; + per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; /* Processor goes to start_secondary(), sets online flag */ ret = do_boot_cpu(sapicid, cpu); if (ret < 0) -- cgit v1.2.3 From fb69c3907ead36b9e9f41ea6f0d0e0ae10a38a47 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Sat, 25 Jun 2005 14:55:05 -0700 Subject: [PATCH] generate hotplug events for cpu online We already do kobject_hotplug for cpu offline; this adds a kobject_hotplug call for the online case. This is being requested by developers of an application which wants to be notified about both kinds of events. Signed-off-by: Nathan Lynch Cc: Rusty Russell Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/cpu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 0bf2dc11cdb..b79badd0f15 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -44,6 +44,8 @@ static ssize_t store_online(struct sys_device *dev, const char *buf, ret = smp_prepare_cpu(cpu->sysdev.id); if (!ret) ret = cpu_up(cpu->sysdev.id); + if (!ret) + kobject_hotplug(&dev->kobj, KOBJ_ONLINE); break; default: ret = -EINVAL; -- cgit v1.2.3 From 5a72e04df5470df0ec646029d31e5528167ab1a7 Mon Sep 17 00:00:00 2001 From: Li Shaohua Date: Sat, 25 Jun 2005 14:55:06 -0700 Subject: [PATCH] suspend/resume SMP support Using CPU hotplug to support suspend/resume SMP. Both S3 and S4 use disable/enable_nonboot_cpus API. The S4 part is based on Pavel's original S4 SMP patch. Signed-off-by: Li Shaohua Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/mcheck/k7.c | 2 +- arch/i386/kernel/cpu/mcheck/mce.c | 2 +- arch/i386/kernel/cpu/mcheck/p4.c | 4 +- arch/i386/kernel/cpu/mcheck/p6.c | 2 +- arch/i386/kernel/cpu/mcheck/winchip.c | 2 +- drivers/acpi/Kconfig | 2 +- include/linux/suspend.h | 2 +- kernel/power/Kconfig | 6 ++- kernel/power/Makefile | 6 +-- kernel/power/disk.c | 35 +++++++------- kernel/power/main.c | 16 ++++--- kernel/power/smp.c | 89 +++++++++++++---------------------- kernel/power/swsusp.c | 2 + 13 files changed, 80 insertions(+), 90 deletions(-) diff --git a/arch/i386/kernel/cpu/mcheck/k7.c b/arch/i386/kernel/cpu/mcheck/k7.c index 8df52e86c4d..c4abe765739 100644 --- a/arch/i386/kernel/cpu/mcheck/k7.c +++ b/arch/i386/kernel/cpu/mcheck/k7.c @@ -69,7 +69,7 @@ static fastcall void k7_machine_check(struct pt_regs * regs, long error_code) /* AMD K7 machine check is Intel like */ -void __init amd_mcheck_init(struct cpuinfo_x86 *c) +void __devinit amd_mcheck_init(struct cpuinfo_x86 *c) { u32 l, h; int i; diff --git a/arch/i386/kernel/cpu/mcheck/mce.c b/arch/i386/kernel/cpu/mcheck/mce.c index 7218a7341fb..2cf25d2ba0f 100644 --- a/arch/i386/kernel/cpu/mcheck/mce.c +++ b/arch/i386/kernel/cpu/mcheck/mce.c @@ -16,7 +16,7 @@ #include "mce.h" -int mce_disabled __initdata = 0; +int mce_disabled __devinitdata = 0; int nr_mce_banks; EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ diff --git a/arch/i386/kernel/cpu/mcheck/p4.c b/arch/i386/kernel/cpu/mcheck/p4.c index 8b16ceb929b..0abccb6fdf9 100644 --- a/arch/i386/kernel/cpu/mcheck/p4.c +++ b/arch/i386/kernel/cpu/mcheck/p4.c @@ -78,7 +78,7 @@ fastcall void smp_thermal_interrupt(struct pt_regs *regs) } /* P4/Xeon Thermal regulation detect and init */ -static void __init intel_init_thermal(struct cpuinfo_x86 *c) +static void __devinit intel_init_thermal(struct cpuinfo_x86 *c) { u32 l, h; unsigned int cpu = smp_processor_id(); @@ -232,7 +232,7 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) } -void __init intel_p4_mcheck_init(struct cpuinfo_x86 *c) +void __devinit intel_p4_mcheck_init(struct cpuinfo_x86 *c) { u32 l, h; int i; diff --git a/arch/i386/kernel/cpu/mcheck/p6.c b/arch/i386/kernel/cpu/mcheck/p6.c index 46640f8c249..f01b73f947e 100644 --- a/arch/i386/kernel/cpu/mcheck/p6.c +++ b/arch/i386/kernel/cpu/mcheck/p6.c @@ -80,7 +80,7 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) } /* Set up machine check reporting for processors with Intel style MCE */ -void __init intel_p6_mcheck_init(struct cpuinfo_x86 *c) +void __devinit intel_p6_mcheck_init(struct cpuinfo_x86 *c) { u32 l, h; int i; diff --git a/arch/i386/kernel/cpu/mcheck/winchip.c b/arch/i386/kernel/cpu/mcheck/winchip.c index 753fa7acb98..7bae68fa168 100644 --- a/arch/i386/kernel/cpu/mcheck/winchip.c +++ b/arch/i386/kernel/cpu/mcheck/winchip.c @@ -23,7 +23,7 @@ static fastcall void winchip_machine_check(struct pt_regs * regs, long error_cod } /* Set up machine check reporting on the Winchip C6 series */ -void __init winchip_mcheck_init(struct cpuinfo_x86 *c) +void __devinit winchip_mcheck_init(struct cpuinfo_x86 *c) { u32 lo, hi; machine_check_vector = winchip_machine_check; diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 670fdb5142d..86c52520ed3 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -55,7 +55,7 @@ if ACPI_INTERPRETER config ACPI_SLEEP bool "Sleep States (EXPERIMENTAL)" - depends on X86 + depends on X86 && (!SMP || SUSPEND_SMP) depends on EXPERIMENTAL && PM default y ---help--- diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 2bf0d5fabcd..f2e96fdfaae 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -58,7 +58,7 @@ static inline int software_suspend(void) } #endif -#ifdef CONFIG_SMP +#ifdef CONFIG_SUSPEND_SMP extern void disable_nonboot_cpus(void); extern void enable_nonboot_cpus(void); #else diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 696387ffe49..fdb37763650 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -28,7 +28,7 @@ config PM_DEBUG config SOFTWARE_SUSPEND bool "Software Suspend (EXPERIMENTAL)" - depends on EXPERIMENTAL && PM && SWAP + depends on EXPERIMENTAL && PM && SWAP && (SUSPEND_SMP || !SMP) ---help--- Enable the possibility of suspending the machine. It doesn't need APM. @@ -72,3 +72,7 @@ config PM_STD_PARTITION suspended image to. It will simply pick the first available swap device. +config SUSPEND_SMP + bool + depends on HOTPLUG_CPU && X86 && PM + default y diff --git a/kernel/power/Makefile b/kernel/power/Makefile index fbdc634135a..2f438d0eaa1 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -3,9 +3,9 @@ ifeq ($(CONFIG_PM_DEBUG),y) EXTRA_CFLAGS += -DDEBUG endif -swsusp-smp-$(CONFIG_SMP) += smp.o - obj-y := main.o process.o console.o pm.o -obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o $(swsusp-smp-y) disk.o +obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o disk.o + +obj-$(CONFIG_SUSPEND_SMP) += smp.o obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 02b6764034d..fb8de63c291 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -117,8 +117,8 @@ static void finish(void) { device_resume(); platform_finish(); - enable_nonboot_cpus(); thaw_processes(); + enable_nonboot_cpus(); pm_restore_console(); } @@ -131,28 +131,35 @@ static int prepare_processes(void) sys_sync(); + disable_nonboot_cpus(); + if (freeze_processes()) { error = -EBUSY; - return error; + goto thaw; } if (pm_disk_mode == PM_DISK_PLATFORM) { if (pm_ops && pm_ops->prepare) { if ((error = pm_ops->prepare(PM_SUSPEND_DISK))) - return error; + goto thaw; } } /* Free memory before shutting down devices. */ free_some_memory(); - return 0; +thaw: + thaw_processes(); + enable_nonboot_cpus(); + pm_restore_console(); + return error; } static void unprepare_processes(void) { - enable_nonboot_cpus(); + platform_finish(); thaw_processes(); + enable_nonboot_cpus(); pm_restore_console(); } @@ -160,15 +167,9 @@ static int prepare_devices(void) { int error; - disable_nonboot_cpus(); - if ((error = device_suspend(PMSG_FREEZE))) { + if ((error = device_suspend(PMSG_FREEZE))) printk("Some devices failed to suspend\n"); - platform_finish(); - enable_nonboot_cpus(); - return error; - } - - return 0; + return error; } /** @@ -185,9 +186,9 @@ int pm_suspend_disk(void) int error; error = prepare_processes(); - if (!error) { - error = prepare_devices(); - } + if (error) + return error; + error = prepare_devices(); if (error) { unprepare_processes(); @@ -250,7 +251,7 @@ static int software_resume(void) if ((error = prepare_processes())) { swsusp_close(); - goto Cleanup; + goto Done; } pr_debug("PM: Reading swsusp image.\n"); diff --git a/kernel/power/main.c b/kernel/power/main.c index 4cdebc972ff..c94cb9e9509 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -55,6 +55,13 @@ static int suspend_prepare(suspend_state_t state) pm_prepare_console(); + disable_nonboot_cpus(); + + if (num_online_cpus() != 1) { + error = -EPERM; + goto Enable_cpu; + } + if (freeze_processes()) { error = -EAGAIN; goto Thaw; @@ -75,6 +82,8 @@ static int suspend_prepare(suspend_state_t state) pm_ops->finish(state); Thaw: thaw_processes(); + Enable_cpu: + enable_nonboot_cpus(); pm_restore_console(); return error; } @@ -113,6 +122,7 @@ static void suspend_finish(suspend_state_t state) if (pm_ops && pm_ops->finish) pm_ops->finish(state); thaw_processes(); + enable_nonboot_cpus(); pm_restore_console(); } @@ -150,12 +160,6 @@ static int enter_state(suspend_state_t state) goto Unlock; } - /* Suspend is hard to get right on SMP. */ - if (num_online_cpus() != 1) { - error = -EPERM; - goto Unlock; - } - pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]); if ((error = suspend_prepare(state))) goto Unlock; diff --git a/kernel/power/smp.c b/kernel/power/smp.c index 457c2302ed4..bbe23079c62 100644 --- a/kernel/power/smp.c +++ b/kernel/power/smp.c @@ -13,73 +13,52 @@ #include #include #include +#include #include #include -static atomic_t cpu_counter, freeze; - - -static void smp_pause(void * data) -{ - struct saved_context ctxt; - __save_processor_state(&ctxt); - printk("Sleeping in:\n"); - dump_stack(); - atomic_inc(&cpu_counter); - while (atomic_read(&freeze)) { - /* FIXME: restore takes place at random piece inside this. - This should probably be written in assembly, and - preserve general-purpose registers, too - - What about stack? We may need to move to new stack here. - - This should better be ran with interrupts disabled. - */ - cpu_relax(); - barrier(); - } - atomic_dec(&cpu_counter); - __restore_processor_state(&ctxt); -} - -static cpumask_t oldmask; +/* This is protected by pm_sem semaphore */ +static cpumask_t frozen_cpus; void disable_nonboot_cpus(void) { - oldmask = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(0)); - printk("Freezing CPUs (at %d)", raw_smp_processor_id()); - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(HZ); - printk("..."); - BUG_ON(raw_smp_processor_id() != 0); - - /* FIXME: for this to work, all the CPUs must be running - * "idle" thread (or we deadlock). Is that guaranteed? */ + int cpu, error; - atomic_set(&cpu_counter, 0); - atomic_set(&freeze, 1); - smp_call_function(smp_pause, NULL, 0, 0); - while (atomic_read(&cpu_counter) < (num_online_cpus() - 1)) { - cpu_relax(); - barrier(); + error = 0; + cpus_clear(frozen_cpus); + printk("Freezing cpus ...\n"); + for_each_online_cpu(cpu) { + if (cpu == 0) + continue; + error = cpu_down(cpu); + if (!error) { + cpu_set(cpu, frozen_cpus); + printk("CPU%d is down\n", cpu); + continue; + } + printk("Error taking cpu %d down: %d\n", cpu, error); } - printk("ok\n"); + BUG_ON(smp_processor_id() != 0); + if (error) + panic("cpus not sleeping"); } void enable_nonboot_cpus(void) { - printk("Restarting CPUs"); - atomic_set(&freeze, 0); - while (atomic_read(&cpu_counter)) { - cpu_relax(); - barrier(); - } - printk("..."); - set_cpus_allowed(current, oldmask); - schedule(); - printk("ok\n"); + int cpu, error; + printk("Thawing cpus ...\n"); + for_each_cpu_mask(cpu, frozen_cpus) { + error = smp_prepare_cpu(cpu); + if (!error) + error = cpu_up(cpu); + if (!error) { + printk("CPU%d is up\n", cpu); + continue; + } + printk("Error taking cpu %d up: %d\n", cpu, error); + panic("Not enough cpus"); + } + cpus_clear(frozen_cpus); } - diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 53f9f8720ee..339b5c3735b 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -1193,8 +1193,10 @@ static const char * sanity_check(void) return "version"; if (strcmp(swsusp_info.uts.machine,system_utsname.machine)) return "machine"; +#if 0 if(swsusp_info.cpus != num_online_cpus()) return "number of cpus"; +#endif return NULL; } -- cgit v1.2.3 From fc5fb2c609c6acef15a8b062063e9135fb08b4d2 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Sat, 25 Jun 2005 14:55:07 -0700 Subject: [PATCH] swsusp: documentation updates This updates documentation and fixes pointers in MAINTAINERS file. Signed-off-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/power/swsusp.txt | 84 ++++++++++++++++++++++++++++++++++++++++-- MAINTAINERS | 4 +- 2 files changed, 82 insertions(+), 6 deletions(-) diff --git a/Documentation/power/swsusp.txt b/Documentation/power/swsusp.txt index c7c3459fde4..8363c51831d 100644 --- a/Documentation/power/swsusp.txt +++ b/Documentation/power/swsusp.txt @@ -164,11 +164,11 @@ place where the thread is safe to be frozen (no kernel semaphores should be held at that point and it must be safe to sleep there), and add: - if (current->flags & PF_FREEZE) - refrigerator(PF_FREEZE); + try_to_freeze(PF_FREEZE); If the thread is needed for writing the image to storage, you should -instead set the PF_NOFREEZE process flag when creating the thread. +instead set the PF_NOFREEZE process flag when creating the thread (and +be very carefull). Q: What is the difference between between "platform", "shutdown" and @@ -233,3 +233,81 @@ A: Try running cat `cat /proc/[0-9]*/maps | grep / | sed 's:.* /:/:' | sort -u` > /dev/null after resume. swapoff -a; swapon -a may also be usefull. + +Q: What happens to devices during swsusp? They seem to be resumed +during system suspend? + +A: That's correct. We need to resume them if we want to write image to +disk. Whole sequence goes like + + Suspend part + ~~~~~~~~~~~~ + running system, user asks for suspend-to-disk + + user processes are stopped + + suspend(PMSG_FREEZE): devices are frozen so that they don't interfere + with state snapshot + + state snapshot: copy of whole used memory is taken with interrupts disabled + + resume(): devices are woken up so that we can write image to swap + + write image to swap + + suspend(PMSG_SUSPEND): suspend devices so that we can power off + + turn the power off + + Resume part + ~~~~~~~~~~~ + (is actually pretty similar) + + running system, user asks for suspend-to-disk + + user processes are stopped (in common case there are none, but with resume-from-initrd, noone knows) + + read image from disk + + suspend(PMSG_FREEZE): devices are frozen so that they don't interfere + with image restoration + + image restoration: rewrite memory with image + + resume(): devices are woken up so that system can continue + + thaw all user processes + +Q: What is this 'Encrypt suspend image' for? + +A: First of all: it is not a replacement for dm-crypt encrypted swap. +It cannot protect your computer while it is suspended. Instead it does +protect from leaking sensitive data after resume from suspend. + +Think of the following: you suspend while an application is running +that keeps sensitive data in memory. The application itself prevents +the data from being swapped out. Suspend, however, must write these +data to swap to be able to resume later on. Without suspend encryption +your sensitive data are then stored in plaintext on disk. This means +that after resume your sensitive data are accessible to all +applications having direct access to the swap device which was used +for suspend. If you don't need swap after resume these data can remain +on disk virtually forever. Thus it can happen that your system gets +broken in weeks later and sensitive data which you thought were +encrypted and protected are retrieved and stolen from the swap device. +To prevent this situation you should use 'Encrypt suspend image'. + +During suspend a temporary key is created and this key is used to +encrypt the data written to disk. When, during resume, the data was +read back into memory the temporary key is destroyed which simply +means that all data written to disk during suspend are then +inaccessible so they can't be stolen later on. The only thing that +you must then take care of is that you call 'mkswap' for the swap +partition used for suspend as early as possible during regular +boot. This asserts that any temporary key from an oopsed suspend or +from a failed or aborted resume is erased from the swap device. + +As a rule of thumb use encrypted swap to protect your data while your +system is shut down or suspended. Additionally use the encrypted +suspend image to prevent sensitive data from being stolen after +resume. diff --git a/MAINTAINERS b/MAINTAINERS index a07eeb41137..dbdd8494b2e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2115,9 +2115,7 @@ S: Maintained SOFTWARE SUSPEND: P: Pavel Machek M: pavel@suse.cz -M: pavel@ucw.cz -L: http://lister.fornax.hu/mailman/listinfo/swsusp -W: http://swsusp.sf.net/ +L: linux-pm@osdl.org S: Maintained SONIC NETWORK DRIVER -- cgit v1.2.3 From 648be3188135add682349e86d46d07cc11c8eb57 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Sat, 25 Jun 2005 14:55:09 -0700 Subject: [PATCH] swsusp: kill config_pm_disk CONFIG_PM_DISK is long gone, but it still managed to survived at few places. Signed-off-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/defconfig | 1 - arch/i386/mm/init.c | 2 +- mm/page_io.c | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/i386/defconfig b/arch/i386/defconfig index 28e62038379..ca07b95c06b 100644 --- a/arch/i386/defconfig +++ b/arch/i386/defconfig @@ -126,7 +126,6 @@ CONFIG_HAVE_DEC_LOCK=y # CONFIG_PM=y CONFIG_SOFTWARE_SUSPEND=y -# CONFIG_PM_DISK is not set # # ACPI (Advanced Configuration and Power Interface) Support diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 3672e2ef51a..12216b52e28 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -352,7 +352,7 @@ static void __init pagetable_init (void) #endif } -#if defined(CONFIG_PM_DISK) || defined(CONFIG_SOFTWARE_SUSPEND) +#ifdef CONFIG_SOFTWARE_SUSPEND /* * Swap suspend & friends need this for resume because things like the intel-agp * driver might have split up a kernel 4MB mapping. diff --git a/mm/page_io.c b/mm/page_io.c index 667c76df1ec..2e605a19ce5 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -127,7 +127,7 @@ out: return ret; } -#if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_PM_DISK) +#ifdef CONFIG_SOFTWARE_SUSPEND /* * A scruffy utility function to read or write an arbitrary swap page * and wait on the I/O. The caller must have a ref on the page. -- cgit v1.2.3 From 343c3f642898cb2cb5c3e4f948e3e0a1bbc0351b Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Sat, 25 Jun 2005 14:55:09 -0700 Subject: [PATCH] s-t-RAM: load gdt the right way Sleep code uses wrong version of lgdt, that does the wrong thing when gdt is beyond 16MB or so. Signed-off-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/acpi/wakeup.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86_64/kernel/acpi/wakeup.S b/arch/x86_64/kernel/acpi/wakeup.S index a4c630034cd..185faa911db 100644 --- a/arch/x86_64/kernel/acpi/wakeup.S +++ b/arch/x86_64/kernel/acpi/wakeup.S @@ -67,7 +67,7 @@ wakeup_code: shll $4, %eax addl $(gdta - wakeup_code), %eax movl %eax, gdt_48a +2 - wakeup_code - lgdt %ds:gdt_48a - wakeup_code # load gdt with whatever is + lgdtl %ds:gdt_48a - wakeup_code # load gdt with whatever is # appropriate movl $1, %eax # protected mode (PE) bit -- cgit v1.2.3 From 5ce47e59c9688d8480ae41100117d8188c191401 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Sat, 25 Jun 2005 14:55:10 -0700 Subject: [PATCH] acpi: fix video docs This fixes typos/formatting in video_extension.txt. Signed-off-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/power/video_extension.txt | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/Documentation/power/video_extension.txt b/Documentation/power/video_extension.txt index 8e33d7c82c4..b2f9b1598ac 100644 --- a/Documentation/power/video_extension.txt +++ b/Documentation/power/video_extension.txt @@ -1,13 +1,16 @@ -This driver implement the ACPI Extensions For Display Adapters -for integrated graphics devices on motherboard, as specified in -ACPI 2.0 Specification, Appendix B, allowing to perform some basic -control like defining the video POST device, retrieving EDID information -or to setup a video output, etc. Note that this is an ref. implementation only. -It may or may not work for your integrated video device. +ACPI video extensions +~~~~~~~~~~~~~~~~~~~~~ + +This driver implement the ACPI Extensions For Display Adapters for +integrated graphics devices on motherboard, as specified in ACPI 2.0 +Specification, Appendix B, allowing to perform some basic control like +defining the video POST device, retrieving EDID information or to +setup a video output, etc. Note that this is an ref. implementation +only. It may or may not work for your integrated video device. Interfaces exposed to userland through /proc/acpi/video: -VGA/info : display the supported video bus device capability like ,Video ROM, CRT/LCD/TV. +VGA/info : display the supported video bus device capability like Video ROM, CRT/LCD/TV. VGA/ROM : Used to get a copy of the display devices' ROM data (up to 4k). VGA/POST_info : Used to determine what options are implemented. VGA/POST : Used to get/set POST device. @@ -15,7 +18,7 @@ VGA/DOS : Used to get/set ownership of output switching: Please refer ACPI spec B.4.1 _DOS VGA/CRT : CRT output VGA/LCD : LCD output -VGA/TV : TV output +VGA/TVO : TV output VGA/*/brightness : Used to get/set brightness of output device Notify event through /proc/acpi/event: -- cgit v1.2.3 From 620b03276488c3cf103caf1e326bd21f00d3df84 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Sat, 25 Jun 2005 14:55:11 -0700 Subject: [PATCH] properly stop devices before poweroff Without this patch, Linux provokes emergency disk shutdowns and similar nastiness. It was in SuSE kernels for some time, IIRC. Signed-off-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pm.h | 33 +++++++++++++++++++++------------ kernel/sys.c | 3 +++ 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/include/linux/pm.h b/include/linux/pm.h index ed2b76e7519..14479325e3f 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -103,7 +103,8 @@ extern int pm_active; /* * Register a device with power management */ -struct pm_dev __deprecated *pm_register(pm_dev_t type, unsigned long id, pm_callback callback); +struct pm_dev __deprecated * +pm_register(pm_dev_t type, unsigned long id, pm_callback callback); /* * Unregister a device with power management @@ -190,17 +191,18 @@ typedef u32 __bitwise pm_message_t; /* * There are 4 important states driver can be in: * ON -- driver is working - * FREEZE -- stop operations and apply whatever policy is applicable to a suspended driver - * of that class, freeze queues for block like IDE does, drop packets for - * ethernet, etc... stop DMA engine too etc... so a consistent image can be - * saved; but do not power any hardware down. - * SUSPEND - like FREEZE, but hardware is doing as much powersaving as possible. Roughly - * pci D3. + * FREEZE -- stop operations and apply whatever policy is applicable to a + * suspended driver of that class, freeze queues for block like IDE + * does, drop packets for ethernet, etc... stop DMA engine too etc... + * so a consistent image can be saved; but do not power any hardware + * down. + * SUSPEND - like FREEZE, but hardware is doing as much powersaving as + * possible. Roughly pci D3. * - * Unfortunately, current drivers only recognize numeric values 0 (ON) and 3 (SUSPEND). - * We'll need to fix the drivers. So yes, putting 3 to all diferent defines is intentional, - * and will go away as soon as drivers are fixed. Also note that typedef is neccessary, - * we'll probably want to switch to + * Unfortunately, current drivers only recognize numeric values 0 (ON) and 3 + * (SUSPEND). We'll need to fix the drivers. So yes, putting 3 to all different + * defines is intentional, and will go away as soon as drivers are fixed. Also + * note that typedef is neccessary, we'll probably want to switch to * typedef struct pm_message_t { int event; int flags; } pm_message_t * or something similar soon. */ @@ -222,11 +224,18 @@ struct dev_pm_info { extern void device_pm_set_parent(struct device * dev, struct device * parent); -extern int device_suspend(pm_message_t state); extern int device_power_down(pm_message_t state); extern void device_power_up(void); extern void device_resume(void); +#ifdef CONFIG_PM +extern int device_suspend(pm_message_t state); +#else +static inline int device_suspend(pm_message_t state) +{ + return 0; +} +#endif #endif /* __KERNEL__ */ diff --git a/kernel/sys.c b/kernel/sys.c index da24bc1292d..dac10161ca2 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -405,6 +405,7 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user case LINUX_REBOOT_CMD_HALT: notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL); system_state = SYSTEM_HALT; + device_suspend(PMSG_SUSPEND); device_shutdown(); printk(KERN_EMERG "System halted.\n"); machine_halt(); @@ -415,6 +416,7 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user case LINUX_REBOOT_CMD_POWER_OFF: notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL); system_state = SYSTEM_POWER_OFF; + device_suspend(PMSG_SUSPEND); device_shutdown(); printk(KERN_EMERG "Power down.\n"); machine_power_off(); @@ -431,6 +433,7 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user notifier_call_chain(&reboot_notifier_list, SYS_RESTART, buffer); system_state = SYSTEM_RESTART; + device_suspend(PMSG_FREEZE); device_shutdown(); printk(KERN_EMERG "Restarting system with command '%s'.\n", buffer); machine_restart(buffer); -- cgit v1.2.3 From 8f9bdf15c059c5d84db9c395705bf79b30762420 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Sat, 25 Jun 2005 14:55:12 -0700 Subject: [PATCH] swsusp: kill unneccessary does_collide_order The following patch removes the unnecessary function does_collide_order(). This function is no longer necessary, as currently there are only 0-order allocations in swsusp, and the use of it is confusing. Signed-off-by: Rafael J. Wysocki Signed-off-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/swsusp.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 339b5c3735b..7747a8c43e8 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -929,21 +929,6 @@ int swsusp_resume(void) return error; } -/* More restore stuff */ - -/* - * Returns true if given address/order collides with any orig_address - */ -static int does_collide_order(unsigned long addr, int order) -{ - int i; - - for (i=0; i < (1< Date: Sat, 25 Jun 2005 14:55:12 -0700 Subject: [PATCH] swsusp: cleanup whitespace The following patch cleans up whitespace in swsusp.c (a bit): - removes any trailing whitespace - adds spaces after if, for, for_each_pbe, for_each_zone etc., wherever necessary. Signed-off-by: Rafael J. Wysocki Signed-off-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/swsusp.c | 66 +++++++++++++++++++++++++-------------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 7747a8c43e8..9a3ca659a43 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -10,12 +10,12 @@ * This file is released under the GPLv2. * * I'd like to thank the following people for their work: - * + * * Pavel Machek : * Modifications, defectiveness pointing, being with me at the very beginning, * suspend to swap space, stop all tasks. Port to 2.4.18-ac and 2.5.17. * - * Steve Doddi : + * Steve Doddi : * Support the possibility of hardware state restoring. * * Raph : @@ -84,11 +84,11 @@ extern char resume_file[]; static unsigned int nr_copy_pages __nosavedata = 0; /* Suspend pagedir is allocated before final copy, therefore it - must be freed after resume + must be freed after resume Warning: this is evil. There are actually two pagedirs at time of resume. One is "pagedir_save", which is empty frame allocated at - time of suspend, that must be freed. Second is "pagedir_nosave", + time of suspend, that must be freed. Second is "pagedir_nosave", allocated at time of resume, that travels through memory not to collide with anything. @@ -132,7 +132,7 @@ static int mark_swapfiles(swp_entry_t prev) { int error; - rw_swap_page_sync(READ, + rw_swap_page_sync(READ, swp_entry(root_swap, 0), virt_to_page((unsigned long)&swsusp_header)); if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) || @@ -140,7 +140,7 @@ static int mark_swapfiles(swp_entry_t prev) memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10); memcpy(swsusp_header.sig,SWSUSP_SIG, 10); swsusp_header.swsusp_info = prev; - error = rw_swap_page_sync(WRITE, + error = rw_swap_page_sync(WRITE, swp_entry(root_swap, 0), virt_to_page((unsigned long) &swsusp_header)); @@ -174,22 +174,22 @@ static int is_resume_device(const struct swap_info_struct *swap_info) static int swsusp_swap_check(void) /* This is called before saving image */ { int i, len; - + len=strlen(resume_file); root_swap = 0xFFFF; - + swap_list_lock(); - for(i=0; iaddress, &(p->swap_address)))) @@ -335,7 +335,7 @@ static int close_swap(void) dump_info(); error = write_page((unsigned long)&swsusp_info, &entry); - if (!error) { + if (!error) { printk( "S" ); error = mark_swapfiles(entry); printk( "|\n" ); @@ -370,7 +370,7 @@ static int write_pagedir(void) struct pbe * pbe; printk( "Writing pagedir..."); - for_each_pb_page(pbe, pagedir_nosave) { + for_each_pb_page (pbe, pagedir_nosave) { if ((error = write_page((unsigned long)pbe, &swsusp_info.pagedir[n++]))) return error; } @@ -472,7 +472,7 @@ static int save_highmem(void) int res = 0; pr_debug("swsusp: Saving Highmem\n"); - for_each_zone(zone) { + for_each_zone (zone) { if (is_highmem(zone)) res = save_highmem_zone(zone); if (res) @@ -547,7 +547,7 @@ static void count_data_pages(void) nr_copy_pages = 0; - for_each_zone(zone) { + for_each_zone (zone) { if (is_highmem(zone)) continue; mark_free_pages(zone); @@ -562,9 +562,9 @@ static void copy_data_pages(void) struct zone *zone; unsigned long zone_pfn; struct pbe * pbe = pagedir_nosave; - + pr_debug("copy_data_pages(): pages to copy: %d\n", nr_copy_pages); - for_each_zone(zone) { + for_each_zone (zone) { if (is_highmem(zone)) continue; mark_free_pages(zone); @@ -702,7 +702,7 @@ static void free_image_pages(void) { struct pbe * p; - for_each_pbe(p, pagedir_save) { + for_each_pbe (p, pagedir_save) { if (p->address) { ClearPageNosave(virt_to_page(p->address)); free_page(p->address); @@ -719,7 +719,7 @@ static int alloc_image_pages(void) { struct pbe * p; - for_each_pbe(p, pagedir_save) { + for_each_pbe (p, pagedir_save) { p->address = get_zeroed_page(GFP_ATOMIC | __GFP_COLD); if (!p->address) return -ENOMEM; @@ -740,7 +740,7 @@ void swsusp_free(void) /** * enough_free_mem - Make sure we enough free memory to snapshot. * - * Returns TRUE or FALSE after checking the number of available + * Returns TRUE or FALSE after checking the number of available * free pages. */ @@ -758,11 +758,11 @@ static int enough_free_mem(void) /** * enough_swap - Make sure we have enough swap to save the image. * - * Returns TRUE or FALSE after checking the total amount of swap + * Returns TRUE or FALSE after checking the total amount of swap * space avaiable. * * FIXME: si_swapinfo(&i) returns all swap devices information. - * We should only consider resume_device. + * We should only consider resume_device. */ static int enough_swap(void) @@ -827,8 +827,8 @@ static int suspend_prepare_image(void) error = swsusp_alloc(); if (error) return error; - - /* During allocating of suspend pagedir, new cold pages may appear. + + /* During allocating of suspend pagedir, new cold pages may appear. * Kill them. */ drain_local_pages(); @@ -1030,7 +1030,7 @@ static struct pbe * swsusp_pagedir_relocate(struct pbe *pblist) /* Set page flags */ - for_each_zone(zone) { + for_each_zone (zone) { for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) SetPageNosaveFree(pfn_to_page(zone_pfn + zone->zone_start_pfn)); -- cgit v1.2.3 From c61978b30322c83a94d7e4857fa5b9996b7d7931 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Sat, 25 Jun 2005 14:55:14 -0700 Subject: [PATCH] swsusp: fix nr_copy_pages The following patch moves the recalculation of nr_copy_pages so that the right number is used in the calculation of the size of memory and swap needed. It prevents swsusp from attempting to suspend if there is not enough memory and/or swap (which is unlikely anyway). Signed-off-by: Rafael J. Wysocki Signed-off-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/swsusp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 9a3ca659a43..c285fc5a232 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -781,18 +781,18 @@ static int swsusp_alloc(void) { int error; + pagedir_nosave = NULL; + nr_copy_pages = calc_nr(nr_copy_pages); + pr_debug("suspend: (pages needed: %d + %d free: %d)\n", nr_copy_pages, PAGES_FOR_IO, nr_free_pages()); - pagedir_nosave = NULL; if (!enough_free_mem()) return -ENOMEM; if (!enough_swap()) return -ENOSPC; - nr_copy_pages = calc_nr(nr_copy_pages); - if (!(pagedir_save = alloc_pagedir(nr_copy_pages))) { printk(KERN_ERR "suspend: Allocating pagedir failed.\n"); return -ENOMEM; -- cgit v1.2.3 From 8d783b3e02002bce8cf9d4e4a82922ee7e59b1e5 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Sat, 25 Jun 2005 14:55:14 -0700 Subject: [PATCH] swsusp: clean assembly parts This patch fixes register saving so that each register is only saved once, and adds missing saving of %cr8 on x86-64. Some reordering so that save/restore is more logical/safer (segment registers should be restored after gdt). Signed-off-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/power/cpu.c | 17 +++++++---------- arch/x86_64/kernel/suspend.c | 18 +++++++++--------- include/asm-x86_64/suspend.h | 2 +- 3 files changed, 17 insertions(+), 20 deletions(-) diff --git a/arch/i386/power/cpu.c b/arch/i386/power/cpu.c index d099d01461f..0e6b45b6125 100644 --- a/arch/i386/power/cpu.c +++ b/arch/i386/power/cpu.c @@ -44,7 +44,6 @@ void __save_processor_state(struct saved_context *ctxt) */ asm volatile ("sgdt %0" : "=m" (ctxt->gdt_limit)); asm volatile ("sidt %0" : "=m" (ctxt->idt_limit)); - asm volatile ("sldt %0" : "=m" (ctxt->ldt)); asm volatile ("str %0" : "=m" (ctxt->tr)); /* @@ -107,7 +106,6 @@ static void fix_processor_context(void) void __restore_processor_state(struct saved_context *ctxt) { - /* * control registers */ @@ -116,6 +114,13 @@ void __restore_processor_state(struct saved_context *ctxt) asm volatile ("movl %0, %%cr2" :: "r" (ctxt->cr2)); asm volatile ("movl %0, %%cr0" :: "r" (ctxt->cr0)); + /* + * now restore the descriptor tables to their proper values + * ltr is done i fix_processor_context(). + */ + asm volatile ("lgdt %0" :: "m" (ctxt->gdt_limit)); + asm volatile ("lidt %0" :: "m" (ctxt->idt_limit)); + /* * segment registers */ @@ -124,14 +129,6 @@ void __restore_processor_state(struct saved_context *ctxt) asm volatile ("movw %0, %%gs" :: "r" (ctxt->gs)); asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss)); - /* - * now restore the descriptor tables to their proper values - * ltr is done i fix_processor_context(). - */ - asm volatile ("lgdt %0" :: "m" (ctxt->gdt_limit)); - asm volatile ("lidt %0" :: "m" (ctxt->idt_limit)); - asm volatile ("lldt %0" :: "m" (ctxt->ldt)); - /* * sysenter MSRs */ diff --git a/arch/x86_64/kernel/suspend.c b/arch/x86_64/kernel/suspend.c index ebaa1e37d65..6c0f402e3a8 100644 --- a/arch/x86_64/kernel/suspend.c +++ b/arch/x86_64/kernel/suspend.c @@ -44,7 +44,6 @@ void __save_processor_state(struct saved_context *ctxt) */ asm volatile ("sgdt %0" : "=m" (ctxt->gdt_limit)); asm volatile ("sidt %0" : "=m" (ctxt->idt_limit)); - asm volatile ("sldt %0" : "=m" (ctxt->ldt)); asm volatile ("str %0" : "=m" (ctxt->tr)); /* XMM0..XMM15 should be handled by kernel_fpu_begin(). */ @@ -69,6 +68,7 @@ void __save_processor_state(struct saved_context *ctxt) asm volatile ("movq %%cr2, %0" : "=r" (ctxt->cr2)); asm volatile ("movq %%cr3, %0" : "=r" (ctxt->cr3)); asm volatile ("movq %%cr4, %0" : "=r" (ctxt->cr4)); + asm volatile ("movq %%cr8, %0" : "=r" (ctxt->cr8)); } void save_processor_state(void) @@ -90,11 +90,19 @@ void __restore_processor_state(struct saved_context *ctxt) /* * control registers */ + asm volatile ("movq %0, %%cr8" :: "r" (ctxt->cr8)); asm volatile ("movq %0, %%cr4" :: "r" (ctxt->cr4)); asm volatile ("movq %0, %%cr3" :: "r" (ctxt->cr3)); asm volatile ("movq %0, %%cr2" :: "r" (ctxt->cr2)); asm volatile ("movq %0, %%cr0" :: "r" (ctxt->cr0)); + /* + * now restore the descriptor tables to their proper values + * ltr is done i fix_processor_context(). + */ + asm volatile ("lgdt %0" :: "m" (ctxt->gdt_limit)); + asm volatile ("lidt %0" :: "m" (ctxt->idt_limit)); + /* * segment registers */ @@ -108,14 +116,6 @@ void __restore_processor_state(struct saved_context *ctxt) wrmsrl(MSR_GS_BASE, ctxt->gs_base); wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); - /* - * now restore the descriptor tables to their proper values - * ltr is done i fix_processor_context(). - */ - asm volatile ("lgdt %0" :: "m" (ctxt->gdt_limit)); - asm volatile ("lidt %0" :: "m" (ctxt->idt_limit)); - asm volatile ("lldt %0" :: "m" (ctxt->ldt)); - fix_processor_context(); do_fpu_end(); diff --git a/include/asm-x86_64/suspend.h b/include/asm-x86_64/suspend.h index ec745807fea..bb9f40597d0 100644 --- a/include/asm-x86_64/suspend.h +++ b/include/asm-x86_64/suspend.h @@ -16,7 +16,7 @@ arch_prepare_suspend(void) struct saved_context { u16 ds, es, fs, gs, ss; unsigned long gs_base, gs_kernel_base, fs_base; - unsigned long cr0, cr2, cr3, cr4; + unsigned long cr0, cr2, cr3, cr4, cr8; u16 gdt_pad; u16 gdt_limit; unsigned long gdt_base; -- cgit v1.2.3 From ac25575203c11145066ea5cb583354cb5f0a8ade Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Sat, 25 Jun 2005 14:55:15 -0700 Subject: [PATCH] CPU hotplug printk fix In the cpu hotplug case, per-cpu data possibly isn't initialized even the system state is 'running'. As the comments say in the original code, some console drivers assume per-cpu resources have been allocated. radeon fb is one such driver, which uses kmalloc. After a CPU is down, the per-cpu data of slab is freed, so the system crashed when printing some info. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/printk.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/printk.c b/kernel/printk.c index 3a442bfb8be..5092397fac2 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -588,8 +588,7 @@ asmlinkage int vprintk(const char *fmt, va_list args) log_level_unknown = 1; } - if (!cpu_online(smp_processor_id()) && - system_state != SYSTEM_RUNNING) { + if (!cpu_online(smp_processor_id())) { /* * Some console drivers may assume that per-cpu resources have * been allocated. So don't allow them to be called by this -- cgit v1.2.3 From 21d6b7e18f70c847c867aafb3109405b48424388 Mon Sep 17 00:00:00 2001 From: "pavel@ucw.cz" Date: Sat, 25 Jun 2005 14:55:16 -0700 Subject: [PATCH] suspend: PCI power managment reference implementation Added reference implementation of suspend and resume routines. From: Shaohua Li build fix Signed-off-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/power/pci.txt | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/Documentation/power/pci.txt b/Documentation/power/pci.txt index 35b1a7dae34..6fc9d511fc3 100644 --- a/Documentation/power/pci.txt +++ b/Documentation/power/pci.txt @@ -291,6 +291,44 @@ a request to enable wake events from D3, two calls should be made to pci_enable_wake (one for both D3hot and D3cold). +A reference implementation +------------------------- +.suspend() +{ + /* driver specific operations */ + + /* Disable IRQ */ + free_irq(); + /* If using MSI */ + pci_disable_msi(); + + pci_save_state(); + pci_enable_wake(); + /* Disable IO/bus master/irq router */ + pci_disable_device(); + pci_set_power_state(pci_choose_state()); +} + +.resume() +{ + pci_set_power_state(PCI_D0); + pci_restore_state(); + /* device's irq possibly is changed, driver should take care */ + pci_enable_device(); + pci_set_master(); + + /* if using MSI, device's vector possibly is changed */ + pci_enable_msi(); + + request_irq(); + /* driver specific operations; */ +} + +This is a typical implementation. Drivers can slightly change the order +of the operations in the implementation, ignore some operations or add +more deriver specific operations in it, but drivers should do something like +this on the whole. + 5. Resources ~~~~~~~~~~~~ -- cgit v1.2.3 From 19c324397a55edf122822f829779b46b9cb385dd Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Sat, 25 Jun 2005 14:55:17 -0700 Subject: [PATCH] swsusp: only allow it when it makes sense Show swsuspend only on .config where it can compile. I got this on PPC32 && SMP: kernel/power/smp.c:24: error: storage size of `ctxt' isn't known Also mark swsusp as no longer experimental. Signed-off-by: Olaf Hering Signed-off-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index fdb37763650..2c7121d9bff 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -27,8 +27,8 @@ config PM_DEBUG like suspend support. config SOFTWARE_SUSPEND - bool "Software Suspend (EXPERIMENTAL)" - depends on EXPERIMENTAL && PM && SWAP && (SUSPEND_SMP || !SMP) + bool "Software Suspend" + depends on EXPERIMENTAL && PM && SWAP && ((X86 && SMP) || ((FVR || PPC32 || X86) && !SMP)) ---help--- Enable the possibility of suspending the machine. It doesn't need APM. -- cgit v1.2.3 From 2a96206559a8c3a18478a6288ac426d716bbd99c Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Sat, 25 Jun 2005 14:55:18 -0700 Subject: [PATCH] Update video-after-suspend documentation Update video-after-suspend documentation; few more machines are added. Signed-off-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/power/video.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/power/video.txt b/Documentation/power/video.txt index 68734355d7c..881a37e3eeb 100644 --- a/Documentation/power/video.txt +++ b/Documentation/power/video.txt @@ -83,8 +83,10 @@ Compaq Armada E500 - P3-700 none (1) (S1 also works OK) Compaq Evo N620c vga=normal, s3_bios (2) Dell 600m, ATI R250 Lf none (1), but needs xorg-x11-6.8.1.902-1 Dell D600, ATI RV250 vga=normal and X, or try vbestate (6) +Dell D610 vga=normal and X (possibly vbestate (6) too, but not tested) Dell Inspiron 4000 ??? (*) Dell Inspiron 500m ??? (*) +Dell Inspiron 510m ??? Dell Inspiron 600m ??? (*) Dell Inspiron 8200 ??? (*) Dell Inspiron 8500 ??? (*) @@ -123,6 +125,7 @@ Toshiba Satellite 4030CDT s3_mode (3) Toshiba Satellite 4080XCDT s3_mode (3) Toshiba Satellite 4090XCDT ??? (*) Toshiba Satellite P10-554 s3_bios,s3_mode (4)(****) +Toshiba M30 (2) xor X with nvidia driver using internal AGP Uniwill 244IIO ??? (*) -- cgit v1.2.3 From 350d5bd84ecda038bb482ab5f2596bdca68109d9 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sat, 25 Jun 2005 14:55:19 -0700 Subject: [PATCH] uml: fix sizeof usage Size of pointer doesn't seem right, but maybe my solution isn't either (sig_size maybe?). Signed-off-by: Domen Puncer Signed-off-by: Jeff Dike Cc: Paolo Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/sys-i386/signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/um/sys-i386/signal.c b/arch/um/sys-i386/signal.c index 03913ca5d25..4efc69a039d 100644 --- a/arch/um/sys-i386/signal.c +++ b/arch/um/sys-i386/signal.c @@ -312,7 +312,7 @@ long sys_sigreturn(struct pt_regs regs) unsigned long __user *extramask = frame->extramask; int sig_size = (_NSIG_WORDS - 1) * sizeof(unsigned long); - if(copy_from_user(&set.sig[0], oldmask, sizeof(&set.sig[0])) || + if(copy_from_user(&set.sig[0], oldmask, sizeof(set.sig[0])) || copy_from_user(&set.sig[1], extramask, sig_size)) goto segfault; -- cgit v1.2.3 From 41f2148a67f28803d64bf5ff538591af90a5ab57 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Sat, 25 Jun 2005 14:55:20 -0700 Subject: [PATCH] uml: kfree cleanup Here's a small patch to remove a few unnessesary NULL pointer checks before kfree() in arch/um/drivers/daemon_user.c Signed-off-by: Jesper Juhl Signed-off-by: Jeff Dike Cc: Paolo Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/drivers/daemon_user.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/um/drivers/daemon_user.c b/arch/um/drivers/daemon_user.c index cf15b4a8b51..c1b03f7c1da 100644 --- a/arch/um/drivers/daemon_user.c +++ b/arch/um/drivers/daemon_user.c @@ -157,9 +157,9 @@ static void daemon_remove(void *data) os_close_file(pri->fd); os_close_file(pri->control); - if(pri->data_addr != NULL) kfree(pri->data_addr); - if(pri->ctl_addr != NULL) kfree(pri->ctl_addr); - if(pri->local_addr != NULL) kfree(pri->local_addr); + kfree(pri->data_addr); + kfree(pri->ctl_addr); + kfree(pri->local_addr); } int daemon_user_write(int fd, void *buf, int len, struct daemon_data *pri) -- cgit v1.2.3 From e0877f07e85a46e4fde32bd84f08551d360839fe Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Sat, 25 Jun 2005 14:55:21 -0700 Subject: [PATCH] uml: fork cleanup Fix the do_fork calling convention: normal arch pass the regs and the new sp value to do_fork instead of NULL. Currently the arch-independent code ignores these values, while the UML code (actually it's copy_thread) gets the right values by itself. With this patch, things are fixed up. Signed-off-by: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Jeff Dike Cc: Paolo Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/process_kern.c | 4 ++-- arch/um/kernel/skas/process_kern.c | 14 +------------- arch/um/kernel/syscall_kern.c | 19 +++++-------------- arch/um/kernel/tt/process_kern.c | 17 +++-------------- arch/um/sys-i386/syscalls.c | 23 ++++------------------- arch/um/sys-x86_64/syscalls.c | 23 ++++------------------- 6 files changed, 19 insertions(+), 81 deletions(-) diff --git a/arch/um/kernel/process_kern.c b/arch/um/kernel/process_kern.c index 157584ae479..0c57bc6e374 100644 --- a/arch/um/kernel/process_kern.c +++ b/arch/um/kernel/process_kern.c @@ -96,8 +96,8 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) current->thread.request.u.thread.proc = fn; current->thread.request.u.thread.arg = arg; - pid = do_fork(CLONE_VM | CLONE_UNTRACED | flags, 0, NULL, 0, NULL, - NULL); + pid = do_fork(CLONE_VM | CLONE_UNTRACED | flags, 0, + ¤t->thread.regs, 0, NULL, NULL); if(pid < 0) panic("do_fork failed in kernel_thread, errno = %d", pid); return(pid); diff --git a/arch/um/kernel/skas/process_kern.c b/arch/um/kernel/skas/process_kern.c index fc71ef29578..8f0607f54b7 100644 --- a/arch/um/kernel/skas/process_kern.c +++ b/arch/um/kernel/skas/process_kern.c @@ -111,8 +111,7 @@ int copy_thread_skas(int nr, unsigned long clone_flags, unsigned long sp, void (*handler)(int); if(current->thread.forking){ - memcpy(&p->thread.regs.regs.skas, - ¤t->thread.regs.regs.skas, + memcpy(&p->thread.regs.regs.skas, ®s->regs.skas, sizeof(p->thread.regs.regs.skas)); REGS_SET_SYSCALL_RETURN(p->thread.regs.regs.skas.regs, 0); if(sp != 0) REGS_SP(p->thread.regs.regs.skas.regs) = sp; @@ -201,14 +200,3 @@ int thread_pid_skas(struct task_struct *task) #warning Need to look up userspace_pid by cpu return(userspace_pid[0]); } - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/syscall_kern.c b/arch/um/kernel/syscall_kern.c index b7a55251e89..8e1a3501ff4 100644 --- a/arch/um/kernel/syscall_kern.c +++ b/arch/um/kernel/syscall_kern.c @@ -31,7 +31,8 @@ long sys_fork(void) long ret; current->thread.forking = 1; - ret = do_fork(SIGCHLD, 0, NULL, 0, NULL, NULL); + ret = do_fork(SIGCHLD, UPT_SP(¤t->thread.regs.regs), + ¤t->thread.regs, 0, NULL, NULL); current->thread.forking = 0; return(ret); } @@ -41,8 +42,9 @@ long sys_vfork(void) long ret; current->thread.forking = 1; - ret = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, NULL, 0, NULL, - NULL); + ret = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, + UPT_SP(¤t->thread.regs.regs), + ¤t->thread.regs, 0, NULL, NULL); current->thread.forking = 0; return(ret); } @@ -162,14 +164,3 @@ int next_syscall_index(int limit) spin_unlock(&syscall_lock); return(ret); } - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/tt/process_kern.c b/arch/um/kernel/tt/process_kern.c index 776310fd5b8..a189a2b9293 100644 --- a/arch/um/kernel/tt/process_kern.c +++ b/arch/um/kernel/tt/process_kern.c @@ -266,10 +266,10 @@ int copy_thread_tt(int nr, unsigned long clone_flags, unsigned long sp, } if(current->thread.forking){ - sc_to_sc(UPT_SC(&p->thread.regs.regs), - UPT_SC(¤t->thread.regs.regs)); + sc_to_sc(UPT_SC(&p->thread.regs.regs), UPT_SC(®s->regs)); SC_SET_SYSCALL_RETURN(UPT_SC(&p->thread.regs.regs), 0); - if(sp != 0) SC_SP(UPT_SC(&p->thread.regs.regs)) = sp; + if(sp != 0) + SC_SP(UPT_SC(&p->thread.regs.regs)) = sp; } p->thread.mode.tt.extern_pid = new_pid; @@ -459,14 +459,3 @@ int is_valid_pid(int pid) read_unlock(&tasklist_lock); return(0); } - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/sys-i386/syscalls.c b/arch/um/sys-i386/syscalls.c index 335e2d89504..83e9be820a8 100644 --- a/arch/um/sys-i386/syscalls.c +++ b/arch/um/sys-i386/syscalls.c @@ -69,15 +69,11 @@ long sys_clone(unsigned long clone_flags, unsigned long newsp, { long ret; - /* XXX: normal arch do here this pass, and also pass the regs to - * do_fork, instead of NULL. Currently the arch-independent code - * ignores these values, while the UML code (actually it's - * copy_thread) does the right thing. But this should change, - probably. */ - /*if (!newsp) - newsp = UPT_SP(current->thread.regs);*/ + if (!newsp) + newsp = UPT_SP(¤t->thread.regs.regs); current->thread.forking = 1; - ret = do_fork(clone_flags, newsp, NULL, 0, parent_tid, child_tid); + ret = do_fork(clone_flags, newsp, ¤t->thread.regs, 0, parent_tid, + child_tid); current->thread.forking = 0; return(ret); } @@ -197,14 +193,3 @@ long sys_sigaction(int sig, const struct old_sigaction __user *act, return ret; } - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/sys-x86_64/syscalls.c b/arch/um/sys-x86_64/syscalls.c index 6f44f40204e..3259a4db453 100644 --- a/arch/um/sys-x86_64/syscalls.c +++ b/arch/um/sys-x86_64/syscalls.c @@ -174,26 +174,11 @@ long sys_clone(unsigned long clone_flags, unsigned long newsp, { long ret; - /* XXX: normal arch do here this pass, and also pass the regs to - * do_fork, instead of NULL. Currently the arch-independent code - * ignores these values, while the UML code (actually it's - * copy_thread) does the right thing. But this should change, - probably. */ - /*if (!newsp) - newsp = UPT_SP(current->thread.regs);*/ + if (!newsp) + newsp = UPT_SP(¤t->thread.regs.regs); current->thread.forking = 1; - ret = do_fork(clone_flags, newsp, NULL, 0, parent_tid, child_tid); + ret = do_fork(clone_flags, newsp, ¤t->thread.regs, 0, parent_tid, + child_tid); current->thread.forking = 0; return(ret); } - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ -- cgit v1.2.3 From a6f4e3cf75538a75ee4fab620e3c8466f2154458 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Sat, 25 Jun 2005 14:55:22 -0700 Subject: [PATCH] uml: fix timer initialization In skas mode, the call to uml_idle_timer permanently shut off the virtual timer, resulting in no timer ticks to anything but the idle thread. This is likely the cause of the soft lockups that are seen sporadically in recent UMLs. Signed-off-by: Jeff Dike Cc: Paolo Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/process_kern.c | 2 +- arch/um/kernel/skas/process_kern.c | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/um/kernel/process_kern.c b/arch/um/kernel/process_kern.c index 0c57bc6e374..d4036ed680b 100644 --- a/arch/um/kernel/process_kern.c +++ b/arch/um/kernel/process_kern.c @@ -169,7 +169,7 @@ int current_pid(void) void default_idle(void) { - uml_idle_timer(); + CHOOSE_MODE(uml_idle_timer(), (void) 0); atomic_inc(&init_mm.mm_count); current->mm = &init_mm; diff --git a/arch/um/kernel/skas/process_kern.c b/arch/um/kernel/skas/process_kern.c index 8f0607f54b7..0a7b8aa55db 100644 --- a/arch/um/kernel/skas/process_kern.c +++ b/arch/um/kernel/skas/process_kern.c @@ -180,7 +180,6 @@ int start_uml_skas(void) start_userspace(0); init_new_thread_signals(1); - uml_idle_timer(); init_task.thread.request.u.thread.proc = start_kernel_proc; init_task.thread.request.u.thread.arg = NULL; -- cgit v1.2.3 From 026549d28469f7d4ca7e5a4707f0d2dc4f2c164c Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Sat, 25 Jun 2005 14:55:23 -0700 Subject: [PATCH] uml: always disable kmalloc during shutdown kmalloc wasn't being disabled during panic. This patch ensures that, no matter how UML is exiting, it is disabled. This matters because part of the cleanup is to remove the umid file, which involves readdir, which calls malloc. This must map to libc malloc, rather than kmalloc or vmalloc. Signed-off-by: Jeff Dike Cc: Paolo Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/main.c | 1 - arch/um/kernel/reboot.c | 9 ++++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/um/kernel/main.c b/arch/um/kernel/main.c index e59f5815267..1e1a87f1c51 100644 --- a/arch/um/kernel/main.c +++ b/arch/um/kernel/main.c @@ -69,7 +69,6 @@ static __init void do_uml_initcalls(void) static void last_ditch_exit(int sig) { - kmalloc_ok = 0; signal(SIGINT, SIG_DFL); signal(SIGTERM, SIG_DFL); signal(SIGHUP, SIG_DFL); diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c index 207f89d7490..fcec51da1d3 100644 --- a/arch/um/kernel/reboot.c +++ b/arch/um/kernel/reboot.c @@ -38,14 +38,14 @@ static void kill_off_processes(void) void uml_cleanup(void) { - kill_off_processes(); + kmalloc_ok = 0; do_uml_exitcalls(); + kill_off_processes(); } void machine_restart(char * __unused) { - do_uml_exitcalls(); - kill_off_processes(); + uml_cleanup(); CHOOSE_MODE(reboot_tt(), reboot_skas()); } @@ -53,8 +53,7 @@ EXPORT_SYMBOL(machine_restart); void machine_power_off(void) { - do_uml_exitcalls(); - kill_off_processes(); + uml_cleanup(); CHOOSE_MODE(halt_tt(), halt_skas()); } -- cgit v1.2.3 From fc47a0d18a1994b4a18d2235fcde1b75dfa72552 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Sat, 25 Jun 2005 14:55:24 -0700 Subject: [PATCH] uml: time initialization tidying user_time_init_skas and user_time_init_tt were essentially the same. So, this merges them, deleting the mode-specific functions and declarations. Signed-off-by: Jeff Dike Cc: Paolo Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/include/time_user.h | 2 +- arch/um/kernel/skas/Makefile | 4 ++-- arch/um/kernel/skas/include/mode-skas.h | 1 - arch/um/kernel/skas/time.c | 30 ------------------------------ arch/um/kernel/time.c | 31 ++++++++++++++----------------- arch/um/kernel/time_kern.c | 2 +- arch/um/kernel/tt/Makefile | 4 ++-- arch/um/kernel/tt/include/mode-tt.h | 1 - arch/um/kernel/tt/time.c | 28 ---------------------------- 9 files changed, 20 insertions(+), 83 deletions(-) delete mode 100644 arch/um/kernel/skas/time.c delete mode 100644 arch/um/kernel/tt/time.c diff --git a/arch/um/include/time_user.h b/arch/um/include/time_user.h index 6793a2fcd0a..f64ef77019a 100644 --- a/arch/um/include/time_user.h +++ b/arch/um/include/time_user.h @@ -8,11 +8,11 @@ extern void timer(void); extern void switch_timers(int to_real); -extern void set_interval(int timer_type); extern void idle_sleep(int secs); extern void enable_timer(void); extern void disable_timer(void); extern unsigned long time_lock(void); extern void time_unlock(unsigned long); +extern void user_time_init(void); #endif diff --git a/arch/um/kernel/skas/Makefile b/arch/um/kernel/skas/Makefile index d37d1bfcd6f..ff69c4b312c 100644 --- a/arch/um/kernel/skas/Makefile +++ b/arch/um/kernel/skas/Makefile @@ -4,10 +4,10 @@ # obj-y := exec_kern.o mem.o mem_user.o mmu.o process.o process_kern.o \ - syscall_kern.o syscall_user.o time.o tlb.o trap_user.o uaccess.o \ + syscall_kern.o syscall_user.o tlb.o trap_user.o uaccess.o \ subdir- := util -USER_OBJS := process.o time.o +USER_OBJS := process.o include arch/um/scripts/Makefile.rules diff --git a/arch/um/kernel/skas/include/mode-skas.h b/arch/um/kernel/skas/include/mode-skas.h index c1e33bd788d..bcd26a6a388 100644 --- a/arch/um/kernel/skas/include/mode-skas.h +++ b/arch/um/kernel/skas/include/mode-skas.h @@ -13,7 +13,6 @@ extern unsigned long exec_fp_regs[]; extern unsigned long exec_fpx_regs[]; extern int have_fpx_regs; -extern void user_time_init_skas(void); extern void sig_handler_common_skas(int sig, void *sc_ptr); extern void halt_skas(void); extern void reboot_skas(void); diff --git a/arch/um/kernel/skas/time.c b/arch/um/kernel/skas/time.c deleted file mode 100644 index 98091494b89..00000000000 --- a/arch/um/kernel/skas/time.c +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL - */ - -#include -#include -#include "time_user.h" -#include "process.h" -#include "user.h" - -void user_time_init_skas(void) -{ - if(signal(SIGALRM, (__sighandler_t) alarm_handler) == SIG_ERR) - panic("Couldn't set SIGALRM handler"); - if(signal(SIGVTALRM, (__sighandler_t) alarm_handler) == SIG_ERR) - panic("Couldn't set SIGVTALRM handler"); - set_interval(ITIMER_VIRTUAL); -} - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index c40c86a3f91..f829b309b63 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -33,7 +33,7 @@ void timer(void) timeradd(&xtime, &local_offset, &xtime); } -void set_interval(int timer_type) +static void set_interval(int timer_type) { int usec = 1000000/hz(); struct itimerval interval = ((struct itimerval) { { 0, usec }, @@ -45,12 +45,7 @@ void set_interval(int timer_type) void enable_timer(void) { - int usec = 1000000/hz(); - struct itimerval enable = ((struct itimerval) { { 0, usec }, - { 0, usec }}); - if(setitimer(ITIMER_VIRTUAL, &enable, NULL)) - printk("enable_timer - setitimer failed, errno = %d\n", - errno); + set_interval(ITIMER_VIRTUAL); } void disable_timer(void) @@ -155,13 +150,15 @@ void idle_sleep(int secs) nanosleep(&ts, NULL); } -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ +/* XXX This partly duplicates init_irq_signals */ + +void user_time_init(void) +{ + set_handler(SIGVTALRM, (__sighandler_t) alarm_handler, + SA_ONSTACK | SA_RESTART, SIGUSR1, SIGIO, SIGWINCH, + SIGALRM, SIGUSR2, -1); + set_handler(SIGALRM, (__sighandler_t) alarm_handler, + SA_ONSTACK | SA_RESTART, SIGUSR1, SIGIO, SIGWINCH, + SIGVTALRM, SIGUSR2, -1); + set_interval(ITIMER_VIRTUAL); +} diff --git a/arch/um/kernel/time_kern.c b/arch/um/kernel/time_kern.c index 6516fc52afe..a8b4ef601f5 100644 --- a/arch/um/kernel/time_kern.c +++ b/arch/um/kernel/time_kern.c @@ -162,7 +162,7 @@ int __init timer_init(void) { int err; - CHOOSE_MODE(user_time_init_tt(), user_time_init_skas()); + user_time_init(); err = request_irq(TIMER_IRQ, um_timer, SA_INTERRUPT, "timer", NULL); if(err != 0) printk(KERN_ERR "timer_init : request_irq failed - " diff --git a/arch/um/kernel/tt/Makefile b/arch/um/kernel/tt/Makefile index 3fd2554e60b..6939e5af847 100644 --- a/arch/um/kernel/tt/Makefile +++ b/arch/um/kernel/tt/Makefile @@ -4,11 +4,11 @@ # obj-y = exec_kern.o exec_user.o gdb.o ksyms.o mem.o mem_user.o process_kern.o \ - syscall_kern.o syscall_user.o time.o tlb.o tracer.o trap_user.o \ + syscall_kern.o syscall_user.o tlb.o tracer.o trap_user.o \ uaccess.o uaccess_user.o obj-$(CONFIG_PT_PROXY) += gdb_kern.o ptproxy/ -USER_OBJS := gdb.o time.o tracer.o +USER_OBJS := gdb.o tracer.o include arch/um/scripts/Makefile.rules diff --git a/arch/um/kernel/tt/include/mode-tt.h b/arch/um/kernel/tt/include/mode-tt.h index efe46201906..e171e15fead 100644 --- a/arch/um/kernel/tt/include/mode-tt.h +++ b/arch/um/kernel/tt/include/mode-tt.h @@ -13,7 +13,6 @@ enum { OP_NONE, OP_EXEC, OP_FORK, OP_TRACE_ON, OP_REBOOT, OP_HALT, OP_CB }; extern int tracing_pid; extern int tracer(int (*init_proc)(void *), void *sp); -extern void user_time_init_tt(void); extern void sig_handler_common_tt(int sig, void *sc); extern void syscall_handler_tt(int sig, union uml_pt_regs *regs); extern void reboot_tt(void); diff --git a/arch/um/kernel/tt/time.c b/arch/um/kernel/tt/time.c deleted file mode 100644 index 8565b71b07c..00000000000 --- a/arch/um/kernel/tt/time.c +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL - */ - -#include -#include -#include -#include "process.h" -#include "user.h" - -void user_time_init_tt(void) -{ - if(signal(SIGVTALRM, (__sighandler_t) alarm_handler) == SIG_ERR) - panic("Couldn't set SIGVTALRM handler"); - set_interval(ITIMER_VIRTUAL); -} - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ -- cgit v1.2.3 From 29d56cfe3ca599ddc3ae9156e7e469c044d97b96 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Sat, 25 Jun 2005 14:55:25 -0700 Subject: [PATCH] uml: hot-unplug code cleanup Clean up the hot-unplugging code. There is now an id procedure which is called to figure out what device we're talking to. The error messages from that are now done from mconsole_remove instead of the driver. remove is now called with the device number, after it has been checked, so doesn't need to do sanity checking on it. Signed-off-by: Jeff Dike Cc: Paolo Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/drivers/line.c | 19 +++++++++++++++++-- arch/um/drivers/mconsole_kern.c | 36 ++++++++++++++++++++++++++++++++---- arch/um/drivers/net_kern.c | 31 +++++++++++++++++++++---------- arch/um/drivers/ssl.c | 9 +++++---- arch/um/drivers/stdio_console.c | 7 ++++--- arch/um/drivers/ubd_kern.c | 37 ++++++++++++++++++++++++------------- arch/um/include/line.h | 3 ++- arch/um/include/mconsole_kern.h | 3 ++- arch/um/kernel/tt/gdb.c | 4 ++-- arch/um/kernel/tt/gdb_kern.c | 2 +- arch/um/kernel/tt/include/debug.h | 17 +++-------------- 11 files changed, 113 insertions(+), 55 deletions(-) diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index 0f59736db32..2bb4c4f5dec 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c @@ -602,11 +602,26 @@ int line_get_config(char *name, struct line *lines, unsigned int num, char *str, return n; } -int line_remove(struct line *lines, unsigned int num, char *str) +int line_id(char **str, int *start_out, int *end_out) +{ + char *end; + int n; + + n = simple_strtoul(*str, &end, 0); + if((*end != '\0') || (end == *str)) + return -1; + + *str = end; + *start_out = n; + *end_out = n; + return n; +} + +int line_remove(struct line *lines, unsigned int num, int n) { char config[sizeof("conxxxx=none\0")]; - sprintf(config, "%s=none", str); + sprintf(config, "%d=none", n); return !line_setup(lines, num, config, 0); } diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index d7c7adcc0a6..404de41a4f6 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -419,8 +419,9 @@ void mconsole_config(struct mc_request *req) void mconsole_remove(struct mc_request *req) { struct mc_device *dev; - char *ptr = req->request.data; - int err; + char *ptr = req->request.data, *err_msg = ""; + char error[256]; + int err, start, end, n; ptr += strlen("remove"); while(isspace(*ptr)) ptr++; @@ -429,8 +430,35 @@ void mconsole_remove(struct mc_request *req) mconsole_reply(req, "Bad remove option", 1, 0); return; } - err = (*dev->remove)(&ptr[strlen(dev->name)]); - mconsole_reply(req, "", err, 0); + + ptr = &ptr[strlen(dev->name)]; + + err = 1; + n = (*dev->id)(&ptr, &start, &end); + if(n < 0){ + err_msg = "Couldn't parse device number"; + goto out; + } + else if((n < start) || (n > end)){ + sprintf(error, "Invalid device number - must be between " + "%d and %d", start, end); + err_msg = error; + goto out; + } + + err = (*dev->remove)(n); + switch(err){ + case -ENODEV: + err_msg = "Device doesn't exist"; + break; + case -EBUSY: + err_msg = "Device is currently open"; + break; + default: + break; + } + out: + mconsole_reply(req, err_msg, err, 0); } #ifdef CONFIG_MAGIC_SYSRQ diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index 5388a742869..1495007bf6c 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -612,25 +612,35 @@ static int net_config(char *str) return(err); } -static int net_remove(char *str) +static int net_id(char **str, int *start_out, int *end_out) +{ + char *end; + int n; + + n = simple_strtoul(*str, &end, 0); + if((*end != '\0') || (end == *str)) + return -1; + + *start_out = n; + *end_out = n; + *str = end; + return n; +} + +static int net_remove(int n) { struct uml_net *device; struct net_device *dev; struct uml_net_private *lp; - char *end; - int n; - - n = simple_strtoul(str, &end, 0); - if((*end != '\0') || (end == str)) - return(-1); device = find_device(n); if(device == NULL) - return(0); + return -ENODEV; dev = device->dev; lp = dev->priv; - if(lp->fd > 0) return(-1); + if(lp->fd > 0) + return -EBUSY; if(lp->remove != NULL) (*lp->remove)(&lp->user); unregister_netdev(dev); platform_device_unregister(&device->pdev); @@ -638,13 +648,14 @@ static int net_remove(char *str) list_del(&device->list); kfree(device); free_netdev(dev); - return(0); + return 0; } static struct mc_device net_mc = { .name = "eth", .config = net_config, .get_config = NULL, + .id = net_id, .remove = net_remove, }; diff --git a/arch/um/drivers/ssl.c b/arch/um/drivers/ssl.c index b32a77010fb..62e04ecfada 100644 --- a/arch/um/drivers/ssl.c +++ b/arch/um/drivers/ssl.c @@ -49,7 +49,7 @@ static struct chan_opts opts = { static int ssl_config(char *str); static int ssl_get_config(char *dev, char *str, int size, char **error_out); -static int ssl_remove(char *str); +static int ssl_remove(int n); static struct line_driver driver = { .name = "UML serial line", @@ -69,6 +69,7 @@ static struct line_driver driver = { .name = "ssl", .config = ssl_config, .get_config = ssl_get_config, + .id = line_id, .remove = ssl_remove, }, }; @@ -94,10 +95,10 @@ static int ssl_get_config(char *dev, char *str, int size, char **error_out) str, size, error_out)); } -static int ssl_remove(char *str) +static int ssl_remove(int n) { - return(line_remove(serial_lines, - sizeof(serial_lines)/sizeof(serial_lines[0]), str)); + return line_remove(serial_lines, + sizeof(serial_lines)/sizeof(serial_lines[0]), n); } int ssl_open(struct tty_struct *tty, struct file *filp) diff --git a/arch/um/drivers/stdio_console.c b/arch/um/drivers/stdio_console.c index afbe1e71ed8..005aa6333b6 100644 --- a/arch/um/drivers/stdio_console.c +++ b/arch/um/drivers/stdio_console.c @@ -55,7 +55,7 @@ static struct chan_opts opts = { static int con_config(char *str); static int con_get_config(char *dev, char *str, int size, char **error_out); -static int con_remove(char *str); +static int con_remove(int n); static struct line_driver driver = { .name = "UML console", @@ -75,6 +75,7 @@ static struct line_driver driver = { .name = "con", .config = con_config, .get_config = con_get_config, + .id = line_id, .remove = con_remove, }, }; @@ -99,9 +100,9 @@ static int con_get_config(char *dev, char *str, int size, char **error_out) size, error_out)); } -static int con_remove(char *str) +static int con_remove(int n) { - return(line_remove(vts, sizeof(vts)/sizeof(vts[0]), str)); + return line_remove(vts, sizeof(vts)/sizeof(vts[0]), n); } static int con_open(struct tty_struct *tty, struct file *filp) diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 2a7f6892c55..344b24d09a7 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -754,24 +754,34 @@ static int ubd_get_config(char *name, char *str, int size, char **error_out) return(len); } -static int ubd_remove(char *str) +static int ubd_id(char **str, int *start_out, int *end_out) +{ + int n; + + n = parse_unit(str); + *start_out = 0; + *end_out = MAX_DEV - 1; + return n; +} + +static int ubd_remove(int n) { struct ubd *dev; - int n, err = -ENODEV; + int err = -ENODEV; - n = parse_unit(&str); + spin_lock(&ubd_lock); - if((n < 0) || (n >= MAX_DEV)) - return(err); + if(ubd_gendisk[n] == NULL) + goto out; dev = &ubd_dev[n]; - if(dev->count > 0) - return(-EBUSY); /* you cannot remove a open disk */ - err = 0; - spin_lock(&ubd_lock); + if(dev->file == NULL) + goto out; - if(ubd_gendisk[n] == NULL) + /* you cannot remove a open disk */ + err = -EBUSY; + if(dev->count > 0) goto out; del_gendisk(ubd_gendisk[n]); @@ -787,15 +797,16 @@ static int ubd_remove(char *str) platform_device_unregister(&dev->pdev); *dev = ((struct ubd) DEFAULT_UBD); err = 0; - out: - spin_unlock(&ubd_lock); - return(err); +out: + spin_unlock(&ubd_lock); + return err; } static struct mc_device ubd_mc = { .name = "ubd", .config = ubd_config, .get_config = ubd_get_config, + .id = ubd_id, .remove = ubd_remove, }; diff --git a/arch/um/include/line.h b/arch/um/include/line.h index 4c5e92c04cc..5323d22a6ca 100644 --- a/arch/um/include/line.h +++ b/arch/um/include/line.h @@ -101,7 +101,8 @@ extern void lines_init(struct line *lines, int nlines); extern void close_lines(struct line *lines, int nlines); extern int line_config(struct line *lines, unsigned int sizeof_lines, char *str); -extern int line_remove(struct line *lines, unsigned int sizeof_lines, char *str); +extern int line_id(char **str, int *start_out, int *end_out); +extern int line_remove(struct line *lines, unsigned int sizeof_lines, int n); extern int line_get_config(char *dev, struct line *lines, unsigned int sizeof_lines, char *str, int size, char **error_out); diff --git a/arch/um/include/mconsole_kern.h b/arch/um/include/mconsole_kern.h index 61c274fcee5..d86ee14260c 100644 --- a/arch/um/include/mconsole_kern.h +++ b/arch/um/include/mconsole_kern.h @@ -20,7 +20,8 @@ struct mc_device { char *name; int (*config)(char *); int (*get_config)(char *, char *, int, char **); - int (*remove)(char *); + int (*id)(char **, int *, int *); + int (*remove)(int); }; #define CONFIG_CHUNK(str, size, current, chunk, end) \ diff --git a/arch/um/kernel/tt/gdb.c b/arch/um/kernel/tt/gdb.c index 19a0ad7b35b..37e22d71a0d 100644 --- a/arch/um/kernel/tt/gdb.c +++ b/arch/um/kernel/tt/gdb.c @@ -153,10 +153,10 @@ void remove_gdb_cb(void *unused) exit_debugger_cb(NULL); } -int gdb_remove(char *unused) +int gdb_remove(int unused) { initial_thread_cb(remove_gdb_cb, NULL); - return(0); + return 0; } void signal_usr1(int sig) diff --git a/arch/um/kernel/tt/gdb_kern.c b/arch/um/kernel/tt/gdb_kern.c index 93fb121f86a..26506388a6a 100644 --- a/arch/um/kernel/tt/gdb_kern.c +++ b/arch/um/kernel/tt/gdb_kern.c @@ -10,7 +10,7 @@ #ifdef CONFIG_MCONSOLE extern int gdb_config(char *str); -extern int gdb_remove(char *unused); +extern int gdb_remove(int n); static struct mc_device gdb_mc = { .name = "gdb", diff --git a/arch/um/kernel/tt/include/debug.h b/arch/um/kernel/tt/include/debug.h index 8eff674107c..738435461e1 100644 --- a/arch/um/kernel/tt/include/debug.h +++ b/arch/um/kernel/tt/include/debug.h @@ -4,8 +4,8 @@ * Licensed under the GPL */ -#ifndef __DEBUG_H -#define __DEBUG_H +#ifndef __UML_TT_DEBUG_H +#define __UML_TT_DEBUG_H extern int debugger_proxy(int status, pid_t pid); extern void child_proxy(pid_t pid, int status); @@ -13,17 +13,6 @@ extern void init_proxy (pid_t pid, int waiting, int status); extern int start_debugger(char *prog, int startup, int stop, int *debugger_fd); extern void fake_child_exit(void); extern int gdb_config(char *str); -extern int gdb_remove(char *unused); +extern int gdb_remove(int unused); #endif - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ -- cgit v1.2.3 From 84dd8d7e9c080b4db66b00b8bc36ccf09a90f824 Mon Sep 17 00:00:00 2001 From: Paolo 'Blaisorblade' Giarrusso Date: Sat, 25 Jun 2005 14:55:26 -0700 Subject: [PATCH] uml: add profile_pc for i386 Cope with a conditional i386 definition, which is wrong for UML. Before we just used that one, but it wasn't defined for CONFIG_SMP, so in that case we got link errors. Signed-off-by: Paolo 'Blaisorblade' Giarrusso Cc: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-um/ptrace-i386.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/asm-um/ptrace-i386.h b/include/asm-um/ptrace-i386.h index 04222f35c43..fe882b9d917 100644 --- a/include/asm-um/ptrace-i386.h +++ b/include/asm-um/ptrace-i386.h @@ -32,6 +32,10 @@ #define PT_REGS_SYSCALL_RET(r) PT_REGS_EAX(r) #define PT_FIX_EXEC_STACK(sp) do ; while(0) +/* Cope with a conditional i386 definition. */ +#undef profile_pc +#define profile_pc(regs) PT_REGS_IP(regs) + #define user_mode(r) UPT_IS_USER(&(r)->regs) #endif -- cgit v1.2.3 From b0744bd2925a4a24865963322534107d2ad553f9 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Sat, 25 Jun 2005 14:55:27 -0700 Subject: [PATCH] s/390: Use klist in cio Convert the common I/O layer to use the klist interfaces. This patch has been adapted from the previous version to the changed interface semantics. Also, gcc 4.0 compile warnings have been removed. Signed-off-by: Cornelia Huck Cc: Greg KH Cc: Martin Schwidefsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/s390/cio/ccwgroup.c | 30 +++++----------- drivers/s390/cio/css.c | 34 ++++++++---------- drivers/s390/cio/device.c | 84 +++++++++++++++++++++++---------------------- 3 files changed, 66 insertions(+), 82 deletions(-) diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c index 306525acb9f..91ea8e4777f 100644 --- a/drivers/s390/cio/ccwgroup.c +++ b/drivers/s390/cio/ccwgroup.c @@ -403,34 +403,22 @@ ccwgroup_driver_register (struct ccwgroup_driver *cdriver) return driver_register(&cdriver->driver); } -static inline struct device * -__get_next_ccwgroup_device(struct device_driver *drv) +static int +__ccwgroup_driver_unregister_device(struct device *dev, void *data) { - struct device *dev, *d; - - down_read(&drv->bus->subsys.rwsem); - dev = NULL; - list_for_each_entry(d, &drv->devices, driver_list) { - dev = get_device(d); - if (dev) - break; - } - up_read(&drv->bus->subsys.rwsem); - return dev; + __ccwgroup_remove_symlinks(to_ccwgroupdev(dev)); + device_unregister(dev); + put_device(dev); + return 0; } void ccwgroup_driver_unregister (struct ccwgroup_driver *cdriver) { - struct device *dev; - /* We don't want ccwgroup devices to live longer than their driver. */ get_driver(&cdriver->driver); - while ((dev = __get_next_ccwgroup_device(&cdriver->driver))) { - __ccwgroup_remove_symlinks(to_ccwgroupdev(dev)); - device_unregister(dev); - put_device(dev); - }; + driver_for_each_device(&cdriver->driver, NULL, NULL, + __ccwgroup_driver_unregister_device); put_driver(&cdriver->driver); driver_unregister(&cdriver->driver); } @@ -449,7 +437,7 @@ __ccwgroup_get_gdev_by_cdev(struct ccw_device *cdev) if (cdev->dev.driver_data) { gdev = (struct ccwgroup_device *)cdev->dev.driver_data; if (get_device(&gdev->dev)) { - if (!list_empty(&gdev->dev.node)) + if (klist_node_attached(&gdev->dev.knode_bus)) return gdev; put_device(&gdev->dev); } diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index 87bd70eeabe..555119cacc2 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -128,34 +128,28 @@ css_probe_device(int irq) return ret; } +static int +check_subchannel(struct device * dev, void * data) +{ + struct subchannel *sch; + int irq = (unsigned long)data; + + sch = to_subchannel(dev); + return (sch->irq == irq); +} + struct subchannel * get_subchannel_by_schid(int irq) { - struct subchannel *sch; - struct list_head *entry; struct device *dev; - if (!get_bus(&css_bus_type)) - return NULL; - down_read(&css_bus_type.subsys.rwsem); - sch = NULL; - list_for_each(entry, &css_bus_type.devices.list) { - dev = get_device(container_of(entry, - struct device, bus_list)); - if (!dev) - continue; - sch = to_subchannel(dev); - if (sch->irq == irq) - break; - put_device(dev); - sch = NULL; - } - up_read(&css_bus_type.subsys.rwsem); - put_bus(&css_bus_type); + dev = bus_find_device(&css_bus_type, NULL, + (void *)(unsigned long)irq, check_subchannel); - return sch; + return dev ? to_subchannel(dev) : NULL; } + static inline int css_get_subchannel_status(struct subchannel *sch, int schid) { diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 809e1108a06..14c76f5e417 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -514,36 +514,39 @@ ccw_device_register(struct ccw_device *cdev) return ret; } +struct match_data { + unsigned int devno; + struct ccw_device * sibling; +}; + +static int +match_devno(struct device * dev, void * data) +{ + struct match_data * d = (struct match_data *)data; + struct ccw_device * cdev; + + cdev = to_ccwdev(dev); + if ((cdev->private->state == DEV_STATE_DISCONNECTED) && + (cdev->private->devno == d->devno) && + (cdev != d->sibling)) { + cdev->private->state = DEV_STATE_NOT_OPER; + return 1; + } + return 0; +} + static struct ccw_device * get_disc_ccwdev_by_devno(unsigned int devno, struct ccw_device *sibling) { - struct ccw_device *cdev; - struct list_head *entry; struct device *dev; + struct match_data data = { + .devno = devno, + .sibling = sibling, + }; - if (!get_bus(&ccw_bus_type)) - return NULL; - down_read(&ccw_bus_type.subsys.rwsem); - cdev = NULL; - list_for_each(entry, &ccw_bus_type.devices.list) { - dev = get_device(container_of(entry, - struct device, bus_list)); - if (!dev) - continue; - cdev = to_ccwdev(dev); - if ((cdev->private->state == DEV_STATE_DISCONNECTED) && - (cdev->private->devno == devno) && - (cdev != sibling)) { - cdev->private->state = DEV_STATE_NOT_OPER; - break; - } - put_device(dev); - cdev = NULL; - } - up_read(&ccw_bus_type.subsys.rwsem); - put_bus(&ccw_bus_type); + dev = bus_find_device(&css_bus_type, NULL, &data, match_devno); - return cdev; + return dev ? to_ccwdev(dev) : NULL; } static void @@ -647,7 +650,7 @@ io_subchannel_register(void *data) cdev = (struct ccw_device *) data; sch = to_subchannel(cdev->dev.parent); - if (!list_empty(&sch->dev.children)) { + if (klist_node_attached(&cdev->dev.knode_parent)) { bus_rescan_devices(&ccw_bus_type); goto out; } @@ -1019,30 +1022,29 @@ ccw_device_probe_console(void) /* * get ccw_device matching the busid, but only if owned by cdrv */ +static int +__ccwdev_check_busid(struct device *dev, void *id) +{ + char *bus_id; + + bus_id = (char *)id; + + return (strncmp(bus_id, dev->bus_id, BUS_ID_SIZE) == 0); +} + + struct ccw_device * get_ccwdev_by_busid(struct ccw_driver *cdrv, const char *bus_id) { - struct device *d, *dev; + struct device *dev; struct device_driver *drv; drv = get_driver(&cdrv->driver); if (!drv) - return 0; - - down_read(&drv->bus->subsys.rwsem); - - dev = NULL; - list_for_each_entry(d, &drv->devices, driver_list) { - dev = get_device(d); + return NULL; - if (dev && !strncmp(bus_id, dev->bus_id, BUS_ID_SIZE)) - break; - else if (dev) { - put_device(dev); - dev = NULL; - } - } - up_read(&drv->bus->subsys.rwsem); + dev = driver_find_device(drv, NULL, (void *)bus_id, + __ccwdev_check_busid); put_driver(drv); return dev ? to_ccwdev(dev) : 0; -- cgit v1.2.3 From c551288e34cff0a78b3103ce2e12099dffa41071 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Sat, 25 Jun 2005 14:55:28 -0700 Subject: [PATCH] s/390: use klist in dasd driver Convert the dasd driver to use the new klist interface. Signed-off-by: Cornelia Huck Cc: Greg KH Cc: Martin Schwidefsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/s390/block/dasd.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index ceeb3cf64a1..3e39508bd92 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1952,26 +1952,24 @@ dasd_generic_notify(struct ccw_device *cdev, int event) * Automatically online either all dasd devices (dasd_autodetect) or * all devices specified with dasd= parameters. */ +static int +__dasd_auto_online(struct device *dev, void *data) +{ + struct ccw_device *cdev; + + cdev = to_ccwdev(dev); + if (dasd_autodetect || dasd_busid_known(cdev->dev.bus_id) == 0) + ccw_device_set_online(cdev); + return 0; +} + void dasd_generic_auto_online (struct ccw_driver *dasd_discipline_driver) { struct device_driver *drv; - struct device *d, *dev; - struct ccw_device *cdev; drv = get_driver(&dasd_discipline_driver->driver); - down_read(&drv->bus->subsys.rwsem); - dev = NULL; - list_for_each_entry(d, &drv->devices, driver_list) { - dev = get_device(d); - if (!dev) - continue; - cdev = to_ccwdev(dev); - if (dasd_autodetect || dasd_busid_known(cdev->dev.bus_id) == 0) - ccw_device_set_online(cdev); - put_device(dev); - } - up_read(&drv->bus->subsys.rwsem); + driver_for_each_device(drv, NULL, NULL, __dasd_auto_online); put_driver(drv); } -- cgit v1.2.3 From f901e5d1e06b3326c100c5d0df43656311befb81 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Sat, 25 Jun 2005 14:55:29 -0700 Subject: [PATCH] s/390: compile fix for dcssblk Fix compile breakage in the dcss block driver introduced by the attribute changes. Signed-off-by: Cornelia Huck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/s390/block/dcssblk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 6bc27d52326..4fde4118899 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -718,7 +718,7 @@ dcssblk_check_params(void) buf[j-i] = dcssblk_segments[j]; } buf[j-i] = '\0'; - rc = dcssblk_add_store(dcssblk_root_dev, buf, j-i); + rc = dcssblk_add_store(dcssblk_root_dev, NULL, buf, j-i); if ((rc >= 0) && (dcssblk_segments[j] == '(')) { for (k = 0; buf[k] != '\0'; k++) buf[k] = toupper(buf[k]); @@ -728,7 +728,7 @@ dcssblk_check_params(void) up_read(&dcssblk_devices_sem); if (dev_info) dcssblk_shared_store(&dev_info->dev, - "0\n", 2); + NULL, "0\n", 2); } } while ((dcssblk_segments[j] != ',') && -- cgit v1.2.3 From 77fa22450de00d535de2cc8be653983560828000 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 25 Jun 2005 14:55:30 -0700 Subject: [PATCH] s390: improved machine check handling Improved machine check handling. Kernel is now able to receive machine checks while in kernel mode (system call, interrupt and program check handling). Also register validation is now performed. Signed-off-by: Martin Schwidefsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/kernel/entry.S | 102 +++++++++++-- arch/s390/kernel/entry64.S | 97 +++++++++++-- arch/s390/kernel/process.c | 46 ++---- arch/s390/kernel/setup.c | 13 +- arch/s390/kernel/smp.c | 13 +- drivers/s390/s390mach.c | 321 ++++++++++++++++++++++++++++++++++++----- drivers/s390/s390mach.h | 35 ++++- include/asm-s390/lowcore.h | 7 +- include/asm-s390/processor.h | 52 +++---- include/asm-s390/ptrace.h | 2 +- include/asm-s390/system.h | 21 ++- include/asm-s390/thread_info.h | 2 + 12 files changed, 576 insertions(+), 135 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index c0e09b33feb..5b262b5d869 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -7,6 +7,7 @@ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Hartmut Penner (hp@de.ibm.com), * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), + * Heiko Carstens */ #include @@ -49,9 +50,9 @@ SP_ILC = STACK_FRAME_OVERHEAD + __PT_ILC SP_TRAP = STACK_FRAME_OVERHEAD + __PT_TRAP SP_SIZE = STACK_FRAME_OVERHEAD + __PT_SIZE -_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ +_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING | \ _TIF_RESTART_SVC | _TIF_SINGLE_STEP ) -_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NEED_RESCHED) +_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING) STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER STACK_SIZE = 1 << STACK_SHIFT @@ -121,7 +122,11 @@ STACK_SIZE = 1 << STACK_SHIFT bz BASED(stack_overflow) 3: #endif -2: s %r15,BASED(.Lc_spsize) # make room for registers & psw +2: + .endm + + .macro CREATE_STACK_FRAME psworg,savearea + s %r15,BASED(.Lc_spsize) # make room for registers & psw mvc SP_PSW(8,%r15),0(%r12) # move user PSW to stack la %r12,\psworg st %r2,SP_ORIG_R2(%r15) # store original content of gpr 2 @@ -161,6 +166,13 @@ __switch_to_base: be __switch_to_noper-__switch_to_base(%r1) # we got away w/o bashing TLB's lctl %c9,%c11,__THREAD_per(%r3) # Nope we didn't __switch_to_noper: + l %r4,__THREAD_info(%r2) # get thread_info of prev + tm __TI_flags+3(%r4),_TIF_MCCK_PENDING # machine check pending? + bz __switch_to_no_mcck-__switch_to_base(%r1) + ni __TI_flags+3(%r4),255-_TIF_MCCK_PENDING # clear flag in prev + l %r4,__THREAD_info(%r3) # get thread_info of next + oi __TI_flags+3(%r4),_TIF_MCCK_PENDING # set it in next +__switch_to_no_mcck: stm %r6,%r15,__SF_GPRS(%r15)# store __switch_to registers of prev task st %r15,__THREAD_ksp(%r2) # store kernel stack to prev->tss.ksp l %r15,__THREAD_ksp(%r3) # load kernel stack from next->tss.ksp @@ -185,6 +197,7 @@ system_call: sysc_saveall: SAVE_ALL_BASE __LC_SAVE_AREA SAVE_ALL __LC_SVC_OLD_PSW,__LC_SAVE_AREA,1 + CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA lh %r7,0x8a # get svc number from lowcore #ifdef CONFIG_VIRT_CPU_ACCOUNTING sysc_vtime: @@ -234,6 +247,8 @@ sysc_work_loop: # One of the work bits is on. Find out which one. # sysc_work: + tm __TI_flags+3(%r9),_TIF_MCCK_PENDING + bo BASED(sysc_mcck_pending) tm __TI_flags+3(%r9),_TIF_NEED_RESCHED bo BASED(sysc_reschedule) tm __TI_flags+3(%r9),_TIF_SIGPENDING @@ -252,6 +267,14 @@ sysc_reschedule: la %r14,BASED(sysc_work_loop) br %r1 # call scheduler +# +# _TIF_MCCK_PENDING is set, call handler +# +sysc_mcck_pending: + l %r1,BASED(.Ls390_handle_mcck) + la %r14,BASED(sysc_work_loop) + br %r1 # TIF bit will be cleared by handler + # # _TIF_SIGPENDING is set, call do_signal # @@ -430,6 +453,7 @@ pgm_check_handler: tm __LC_PGM_INT_CODE+1,0x80 # check whether we got a per exception bnz BASED(pgm_per) # got per exception -> special case SAVE_ALL __LC_PGM_OLD_PSW,__LC_SAVE_AREA,1 + CREATE_STACK_FRAME __LC_PGM_OLD_PSW,__LC_SAVE_AREA #ifdef CONFIG_VIRT_CPU_ACCOUNTING tm SP_PSW+1(%r15),0x01 # interrupting from user ? bz BASED(pgm_no_vtime) @@ -468,6 +492,7 @@ pgm_per: # pgm_per_std: SAVE_ALL __LC_PGM_OLD_PSW,__LC_SAVE_AREA,1 + CREATE_STACK_FRAME __LC_PGM_OLD_PSW,__LC_SAVE_AREA #ifdef CONFIG_VIRT_CPU_ACCOUNTING tm SP_PSW+1(%r15),0x01 # interrupting from user ? bz BASED(pgm_no_vtime2) @@ -493,6 +518,7 @@ pgm_no_vtime2: # pgm_svcper: SAVE_ALL __LC_SVC_OLD_PSW,__LC_SAVE_AREA,1 + CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA #ifdef CONFIG_VIRT_CPU_ACCOUNTING tm SP_PSW+1(%r15),0x01 # interrupting from user ? bz BASED(pgm_no_vtime3) @@ -521,6 +547,7 @@ io_int_handler: stck __LC_INT_CLOCK SAVE_ALL_BASE __LC_SAVE_AREA+16 SAVE_ALL __LC_IO_OLD_PSW,__LC_SAVE_AREA+16,0 + CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+16 #ifdef CONFIG_VIRT_CPU_ACCOUNTING tm SP_PSW+1(%r15),0x01 # interrupting from user ? bz BASED(io_no_vtime) @@ -578,15 +605,25 @@ io_work: lr %r15,%r1 # # One of the work bits is on. Find out which one. -# Checked are: _TIF_SIGPENDING and _TIF_NEED_RESCHED +# Checked are: _TIF_SIGPENDING, _TIF_NEED_RESCHED and _TIF_MCCK_PENDING # io_work_loop: + tm __TI_flags+3(%r9),_TIF_MCCK_PENDING + bo BASED(io_mcck_pending) tm __TI_flags+3(%r9),_TIF_NEED_RESCHED bo BASED(io_reschedule) tm __TI_flags+3(%r9),_TIF_SIGPENDING bo BASED(io_sigpending) b BASED(io_leave) +# +# _TIF_MCCK_PENDING is set, call handler +# +io_mcck_pending: + l %r1,BASED(.Ls390_handle_mcck) + l %r14,BASED(io_work_loop) + br %r1 # TIF bit will be cleared by handler + # # _TIF_NEED_RESCHED is set, call schedule # @@ -621,6 +658,7 @@ ext_int_handler: stck __LC_INT_CLOCK SAVE_ALL_BASE __LC_SAVE_AREA+16 SAVE_ALL __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16,0 + CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16 #ifdef CONFIG_VIRT_CPU_ACCOUNTING tm SP_PSW+1(%r15),0x01 # interrupting from user ? bz BASED(ext_no_vtime) @@ -642,19 +680,62 @@ ext_no_vtime: .globl mcck_int_handler mcck_int_handler: - STORE_TIMER __LC_ASYNC_ENTER_TIMER + spt __LC_CPU_TIMER_SAVE_AREA # revalidate cpu timer + lm %r0,%r15,__LC_GPREGS_SAVE_AREA # revalidate gprs SAVE_ALL_BASE __LC_SAVE_AREA+32 - SAVE_ALL __LC_MCK_OLD_PSW,__LC_SAVE_AREA+32,0 + la %r12,__LC_MCK_OLD_PSW + tm __LC_MCCK_CODE,0x80 # system damage? + bo BASED(mcck_int_main) # yes -> rest of mcck code invalid + tm __LC_MCCK_CODE+5,0x02 # stored cpu timer value valid? + bo BASED(0f) + spt __LC_LAST_UPDATE_TIMER # revalidate cpu timer #ifdef CONFIG_VIRT_CPU_ACCOUNTING - tm SP_PSW+1(%r15),0x01 # interrupting from user ? + mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),__LC_EXIT_TIMER +0: tm __LC_MCCK_CODE+2,0x08 # mwp of old psw valid? + bno BASED(mcck_no_vtime) # no -> skip cleanup critical + tm __LC_MCK_OLD_PSW+1,0x01 # interrupting from user ? bz BASED(mcck_no_vtime) UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER mcck_no_vtime: #endif +0: + tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid? + bno BASED(mcck_int_main) # no -> skip cleanup critical + tm __LC_MCK_OLD_PSW+1,0x01 # test problem state bit + bnz BASED(mcck_int_main) # from user -> load async stack + clc __LC_MCK_OLD_PSW+4(4),BASED(.Lcritical_end) + bhe BASED(mcck_int_main) + clc __LC_MCK_OLD_PSW+4(4),BASED(.Lcritical_start) + bl BASED(mcck_int_main) + l %r14,BASED(.Lcleanup_critical) + basr %r14,%r14 +mcck_int_main: + l %r14,__LC_PANIC_STACK # are we already on the panic stack? + slr %r14,%r15 + sra %r14,PAGE_SHIFT + be BASED(0f) + l %r15,__LC_PANIC_STACK # load panic stack +0: CREATE_STACK_FRAME __LC_MCK_OLD_PSW,__LC_SAVE_AREA+32 + l %r9,__LC_THREAD_INFO # load pointer to thread_info struct + la %r2,SP_PTREGS(%r15) # load pt_regs l %r1,BASED(.Ls390_mcck) - basr %r14,%r1 # call machine check handler + basr %r14,%r1 # call machine check handler + tm SP_PSW+1(%r15),0x01 # returning to user ? + bno BASED(mcck_return) + l %r1,__LC_KERNEL_STACK # switch to kernel stack + s %r1,BASED(.Lc_spsize) + mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) + xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) # clear back chain + lr %r15,%r1 + stosm __SF_EMPTY(%r15),0x04 # turn dat on + tm __TI_flags+3(%r9),_TIF_MCCK_PENDING + bno BASED(mcck_return) + l %r1,BASED(.Ls390_handle_mcck) + basr %r14,%r1 # call machine check handler mcck_return: RESTORE_ALL 0 @@ -742,7 +823,7 @@ cleanup_critical: clc 4(4,%r12),BASED(cleanup_table_sysc_work_loop) bl BASED(0f) clc 4(4,%r12),BASED(cleanup_table_sysc_work_loop+4) - bl BASED(cleanup_sysc_leave) + bl BASED(cleanup_sysc_return) 0: br %r14 @@ -760,6 +841,7 @@ cleanup_system_call: mvc __LC_SAVE_AREA(16),__LC_SAVE_AREA+16 0: st %r13,__LC_SAVE_AREA+20 SAVE_ALL __LC_SVC_OLD_PSW,__LC_SAVE_AREA,1 + CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA st %r15,__LC_SAVE_AREA+28 lh %r7,0x8a #ifdef CONFIG_VIRT_CPU_ACCOUNTING @@ -834,6 +916,8 @@ cleanup_sysc_leave_insn: * Symbol constants */ .Ls390_mcck: .long s390_do_machine_check +.Ls390_handle_mcck: + .long s390_handle_mcck .Ldo_IRQ: .long do_IRQ .Ldo_extint: .long do_extint .Ldo_signal: .long do_signal diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 51527ab8c8f..57ca75d0ad7 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -7,6 +7,7 @@ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Hartmut Penner (hp@de.ibm.com), * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), + * Heiko Carstens */ #include @@ -52,9 +53,9 @@ SP_SIZE = STACK_FRAME_OVERHEAD + __PT_SIZE STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER STACK_SIZE = 1 << STACK_SHIFT -_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ +_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING | \ _TIF_RESTART_SVC | _TIF_SINGLE_STEP ) -_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NEED_RESCHED) +_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING) #define BASED(name) name-system_call(%r13) @@ -114,7 +115,11 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NEED_RESCHED) jz stack_overflow 3: #endif -2: aghi %r15,-SP_SIZE # make room for registers & psw +2: + .endm + + .macro CREATE_STACK_FRAME psworg,savearea + aghi %r15,-SP_SIZE # make room for registers & psw mvc SP_PSW(16,%r15),0(%r12) # move user PSW to stack la %r12,\psworg stg %r2,SP_ORIG_R2(%r15) # store original content of gpr 2 @@ -152,6 +157,13 @@ __switch_to: je __switch_to_noper # we got away without bashing TLB's lctlg %c9,%c11,__THREAD_per(%r3) # Nope we didn't __switch_to_noper: + lg %r4,__THREAD_info(%r2) # get thread_info of prev + tm __TI_flags+7(%r4),_TIF_MCCK_PENDING # machine check pending? + jz __switch_to_no_mcck + ni __TI_flags+7(%r4),255-_TIF_MCCK_PENDING # clear flag in prev + lg %r4,__THREAD_info(%r3) # get thread_info of next + oi __TI_flags+7(%r4),_TIF_MCCK_PENDING # set it in next +__switch_to_no_mcck: stmg %r6,%r15,__SF_GPRS(%r15)# store __switch_to registers of prev task stg %r15,__THREAD_ksp(%r2) # store kernel stack to prev->tss.ksp lg %r15,__THREAD_ksp(%r3) # load kernel stack from next->tss.ksp @@ -176,6 +188,7 @@ system_call: sysc_saveall: SAVE_ALL_BASE __LC_SAVE_AREA SAVE_ALL __LC_SVC_OLD_PSW,__LC_SAVE_AREA,1 + CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA llgh %r7,__LC_SVC_INT_CODE # get svc number from lowcore #ifdef CONFIG_VIRT_CPU_ACCOUNTING sysc_vtime: @@ -232,6 +245,8 @@ sysc_work_loop: # One of the work bits is on. Find out which one. # sysc_work: + tm __TI_flags+7(%r9),_TIF_MCCK_PENDING + jo sysc_mcck_pending tm __TI_flags+7(%r9),_TIF_NEED_RESCHED jo sysc_reschedule tm __TI_flags+7(%r9),_TIF_SIGPENDING @@ -249,6 +264,13 @@ sysc_reschedule: larl %r14,sysc_work_loop jg schedule # return point is sysc_return +# +# _TIF_MCCK_PENDING is set, call handler +# +sysc_mcck_pending: + larl %r14,sysc_work_loop + jg s390_handle_mcck # TIF bit will be cleared by handler + # # _TIF_SIGPENDING is set, call do_signal # @@ -474,6 +496,7 @@ pgm_check_handler: tm __LC_PGM_INT_CODE+1,0x80 # check whether we got a per exception jnz pgm_per # got per exception -> special case SAVE_ALL __LC_PGM_OLD_PSW,__LC_SAVE_AREA,1 + CREATE_STACK_FRAME __LC_PGM_OLD_PSW,__LC_SAVE_AREA #ifdef CONFIG_VIRT_CPU_ACCOUNTING tm SP_PSW+1(%r15),0x01 # interrupting from user ? jz pgm_no_vtime @@ -512,6 +535,7 @@ pgm_per: # pgm_per_std: SAVE_ALL __LC_PGM_OLD_PSW,__LC_SAVE_AREA,1 + CREATE_STACK_FRAME __LC_PGM_OLD_PSW,__LC_SAVE_AREA #ifdef CONFIG_VIRT_CPU_ACCOUNTING tm SP_PSW+1(%r15),0x01 # interrupting from user ? jz pgm_no_vtime2 @@ -537,6 +561,7 @@ pgm_no_vtime2: # pgm_svcper: SAVE_ALL __LC_SVC_OLD_PSW,__LC_SAVE_AREA,1 + CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA #ifdef CONFIG_VIRT_CPU_ACCOUNTING tm SP_PSW+1(%r15),0x01 # interrupting from user ? jz pgm_no_vtime3 @@ -564,6 +589,7 @@ io_int_handler: stck __LC_INT_CLOCK SAVE_ALL_BASE __LC_SAVE_AREA+32 SAVE_ALL __LC_IO_OLD_PSW,__LC_SAVE_AREA+32,0 + CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+32 #ifdef CONFIG_VIRT_CPU_ACCOUNTING tm SP_PSW+1(%r15),0x01 # interrupting from user ? jz io_no_vtime @@ -621,15 +647,24 @@ io_work: lgr %r15,%r1 # # One of the work bits is on. Find out which one. -# Checked are: _TIF_SIGPENDING and _TIF_NEED_RESCHED +# Checked are: _TIF_SIGPENDING, _TIF_NEED_RESCHED and _TIF_MCCK_PENDING # io_work_loop: + tm __TI_flags+7(%r9),_TIF_MCCK_PENDING + jo io_mcck_pending tm __TI_flags+7(%r9),_TIF_NEED_RESCHED jo io_reschedule tm __TI_flags+7(%r9),_TIF_SIGPENDING jo io_sigpending j io_leave +# +# _TIF_MCCK_PENDING is set, call handler +# +io_mcck_pending: + larl %r14,io_work_loop + jg s390_handle_mcck # TIF bit will be cleared by handler + # # _TIF_NEED_RESCHED is set, call schedule # @@ -661,6 +696,7 @@ ext_int_handler: stck __LC_INT_CLOCK SAVE_ALL_BASE __LC_SAVE_AREA+32 SAVE_ALL __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32,0 + CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32 #ifdef CONFIG_VIRT_CPU_ACCOUNTING tm SP_PSW+1(%r15),0x01 # interrupting from user ? jz ext_no_vtime @@ -680,18 +716,60 @@ ext_no_vtime: */ .globl mcck_int_handler mcck_int_handler: - STORE_TIMER __LC_ASYNC_ENTER_TIMER + la %r1,4095 # revalidate r1 + spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # revalidate cpu timer + lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs SAVE_ALL_BASE __LC_SAVE_AREA+64 - SAVE_ALL __LC_MCK_OLD_PSW,__LC_SAVE_AREA+64,0 + la %r12,__LC_MCK_OLD_PSW + tm __LC_MCCK_CODE,0x80 # system damage? + jo mcck_int_main # yes -> rest of mcck code invalid + tm __LC_MCCK_CODE+5,0x02 # stored cpu timer value valid? + jo 0f + spt __LC_LAST_UPDATE_TIMER #ifdef CONFIG_VIRT_CPU_ACCOUNTING - tm SP_PSW+1(%r15),0x01 # interrupting from user ? + mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),__LC_EXIT_TIMER +0: tm __LC_MCCK_CODE+2,0x08 # mwp of old psw valid? + jno mcck_no_vtime # no -> no timer update + tm __LC_MCK_OLD_PSW+1,0x01 # interrupting from user ? jz mcck_no_vtime UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER mcck_no_vtime: #endif - brasl %r14,s390_do_machine_check +0: + tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid? + jno mcck_int_main # no -> skip cleanup critical + tm __LC_MCK_OLD_PSW+1,0x01 # test problem state bit + jnz mcck_int_main # from user -> load kernel stack + clc __LC_MCK_OLD_PSW+8(8),BASED(.Lcritical_end) + jhe mcck_int_main + clc __LC_MCK_OLD_PSW+8(8),BASED(.Lcritical_start) + jl mcck_int_main + brasl %r14,cleanup_critical +mcck_int_main: + lg %r14,__LC_PANIC_STACK # are we already on the panic stack? + slgr %r14,%r15 + srag %r14,%r14,PAGE_SHIFT + jz 0f + lg %r15,__LC_PANIC_STACK # load panic stack +0: CREATE_STACK_FRAME __LC_MCK_OLD_PSW,__LC_SAVE_AREA+64 + lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct + la %r2,SP_PTREGS(%r15) # load pt_regs + brasl %r14,s390_do_machine_check + tm SP_PSW+1(%r15),0x01 # returning to user ? + jno mcck_return + lg %r1,__LC_KERNEL_STACK # switch to kernel stack + aghi %r1,-SP_SIZE + mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) + xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) # clear back chain + lgr %r15,%r1 + stosm __SF_EMPTY(%r15),0x04 # turn dat on + tm __TI_flags+7(%r9),_TIF_MCCK_PENDING + jno mcck_return + brasl %r14,s390_handle_mcck mcck_return: RESTORE_ALL 0 @@ -775,7 +853,7 @@ cleanup_critical: clc 8(8,%r12),BASED(cleanup_table_sysc_work_loop) jl 0f clc 8(8,%r12),BASED(cleanup_table_sysc_work_loop+8) - jl cleanup_sysc_leave + jl cleanup_sysc_return 0: br %r14 @@ -793,6 +871,7 @@ cleanup_system_call: mvc __LC_SAVE_AREA(32),__LC_SAVE_AREA+32 0: stg %r13,__LC_SAVE_AREA+40 SAVE_ALL __LC_SVC_OLD_PSW,__LC_SAVE_AREA,1 + CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA stg %r15,__LC_SAVE_AREA+56 llgh %r7,__LC_SVC_INT_CODE #ifdef CONFIG_VIRT_CPU_ACCOUNTING diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 7aea25d6e30..9f3dff6c0b7 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -91,13 +91,12 @@ void do_monitor_call(struct pt_regs *regs, long interruption_code) (void *)(long) smp_processor_id()); } +extern void s390_handle_mcck(void); /* * The idle loop on a S390... */ void default_idle(void) { - psw_t wait_psw; - unsigned long reg; int cpu, rc; local_irq_disable(); @@ -125,38 +124,17 @@ void default_idle(void) cpu_die(); #endif - /* - * Wait for external, I/O or machine check interrupt and - * switch off machine check bit after the wait has ended. - */ - wait_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK | PSW_MASK_WAIT | - PSW_MASK_IO | PSW_MASK_EXT; -#ifndef CONFIG_ARCH_S390X - asm volatile ( - " basr %0,0\n" - "0: la %0,1f-0b(%0)\n" - " st %0,4(%1)\n" - " oi 4(%1),0x80\n" - " lpsw 0(%1)\n" - "1: la %0,2f-1b(%0)\n" - " st %0,4(%1)\n" - " oi 4(%1),0x80\n" - " ni 1(%1),0xf9\n" - " lpsw 0(%1)\n" - "2:" - : "=&a" (reg) : "a" (&wait_psw) : "memory", "cc" ); -#else /* CONFIG_ARCH_S390X */ - asm volatile ( - " larl %0,0f\n" - " stg %0,8(%1)\n" - " lpswe 0(%1)\n" - "0: larl %0,1f\n" - " stg %0,8(%1)\n" - " ni 1(%1),0xf9\n" - " lpswe 0(%1)\n" - "1:" - : "=&a" (reg) : "a" (&wait_psw) : "memory", "cc" ); -#endif /* CONFIG_ARCH_S390X */ + local_mcck_disable(); + if (test_thread_flag(TIF_MCCK_PENDING)) { + local_mcck_enable(); + local_irq_enable(); + s390_handle_mcck(); + return; + } + + /* Wait for external, I/O or machine check interrupt. */ + __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_WAIT | + PSW_MASK_IO | PSW_MASK_EXT); } void cpu_idle(void) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index df83215beac..eb7be0ad717 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -414,7 +414,8 @@ setup_lowcore(void) lc->program_new_psw.mask = PSW_KERNEL_BITS; lc->program_new_psw.addr = PSW_ADDR_AMODE | (unsigned long)pgm_check_handler; - lc->mcck_new_psw.mask = PSW_KERNEL_BITS; + lc->mcck_new_psw.mask = + PSW_KERNEL_BITS & ~PSW_MASK_MCHECK & ~PSW_MASK_DAT; lc->mcck_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) mcck_int_handler; lc->io_new_psw.mask = PSW_KERNEL_BITS; @@ -424,12 +425,18 @@ setup_lowcore(void) lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE; lc->async_stack = (unsigned long) __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0) + ASYNC_SIZE; -#ifdef CONFIG_CHECK_STACK lc->panic_stack = (unsigned long) __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0) + PAGE_SIZE; -#endif lc->current_task = (unsigned long) init_thread_union.thread_info.task; lc->thread_info = (unsigned long) &init_thread_union; +#ifndef CONFIG_ARCH_S390X + if (MACHINE_HAS_IEEE) { + lc->extended_save_area_addr = (__u32) + __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0); + /* enable extended save area */ + ctl_set_bit(14, 29); + } +#endif #ifdef CONFIG_ARCH_S390X if (MACHINE_HAS_DIAG44) lc->diag44_opcode = 0x83000044; diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 93c71fef99d..50c335067cf 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -773,13 +773,24 @@ void __init smp_prepare_cpus(unsigned int max_cpus) *(lowcore_ptr[i]) = S390_lowcore; lowcore_ptr[i]->async_stack = stack + (ASYNC_SIZE); -#ifdef CONFIG_CHECK_STACK stack = __get_free_pages(GFP_KERNEL,0); if (stack == 0ULL) panic("smp_boot_cpus failed to allocate memory\n"); lowcore_ptr[i]->panic_stack = stack + (PAGE_SIZE); +#ifndef __s390x__ + if (MACHINE_HAS_IEEE) { + lowcore_ptr[i]->extended_save_area_addr = + (__u32) __get_free_pages(GFP_KERNEL,0); + if (lowcore_ptr[i]->extended_save_area_addr == 0) + panic("smp_boot_cpus failed to " + "allocate memory\n"); + } #endif } +#ifndef __s390x__ + if (MACHINE_HAS_IEEE) + ctl_set_bit(14, 29); /* enable extended save area */ +#endif set_prefix((u32)(unsigned long) lowcore_ptr[smp_processor_id()]); for_each_cpu(cpu) diff --git a/drivers/s390/s390mach.c b/drivers/s390/s390mach.c index ffa996c8a90..5bb255e02ac 100644 --- a/drivers/s390/s390mach.c +++ b/drivers/s390/s390mach.c @@ -31,14 +31,14 @@ extern void css_reiterate_subchannels(void); extern struct workqueue_struct *slow_path_wq; extern struct work_struct slow_path_work; -static void +static NORET_TYPE void s390_handle_damage(char *msg) { - printk(KERN_EMERG "%s\n", msg); #ifdef CONFIG_SMP smp_send_stop(); #endif disabled_wait((unsigned long) __builtin_return_address(0)); + for(;;); } /* @@ -122,40 +122,39 @@ repeat: return 0; } +struct mcck_struct { + int kill_task; + int channel_report; + int warning; + unsigned long long mcck_code; +}; + +static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck); + /* - * machine check handler. + * Main machine check handler function. Will be called with interrupts enabled + * or disabled and machine checks enabled or disabled. */ void -s390_do_machine_check(void) +s390_handle_mcck(void) { - struct mci *mci; - - mci = (struct mci *) &S390_lowcore.mcck_interruption_code; + unsigned long flags; + struct mcck_struct mcck; - if (mci->sd) /* system damage */ - s390_handle_damage("received system damage machine check\n"); + /* + * Disable machine checks and get the current state of accumulated + * machine checks. Afterwards delete the old state and enable machine + * checks again. + */ + local_irq_save(flags); + local_mcck_disable(); + mcck = __get_cpu_var(cpu_mcck); + memset(&__get_cpu_var(cpu_mcck), 0, sizeof(struct mcck_struct)); + clear_thread_flag(TIF_MCCK_PENDING); + local_mcck_enable(); + local_irq_restore(flags); - if (mci->pd) /* instruction processing damage */ - s390_handle_damage("received instruction processing " - "damage machine check\n"); - - if (mci->se) /* storage error uncorrected */ - s390_handle_damage("received storage error uncorrected " - "machine check\n"); - - if (mci->sc) /* storage error corrected */ - printk(KERN_WARNING - "received storage error corrected machine check\n"); - - if (mci->ke) /* storage key-error uncorrected */ - s390_handle_damage("received storage key-error uncorrected " - "machine check\n"); - - if (mci->ds && mci->fa) /* storage degradation */ - s390_handle_damage("received storage degradation machine " - "check\n"); - - if (mci->cp) /* channel report word pending */ + if (mcck.channel_report) up(&m_sem); #ifdef CONFIG_MACHCHK_WARNING @@ -168,7 +167,7 @@ s390_do_machine_check(void) * On VM we only get one interrupt per virtally presented machinecheck. * Though one suffices, we may get one interrupt per (virtual) processor. */ - if (mci->w) { /* WARNING pending ? */ + if (mcck.warning) { /* WARNING pending ? */ static int mchchk_wng_posted = 0; /* * Use single machine clear, as we cannot handle smp right now @@ -178,6 +177,261 @@ s390_do_machine_check(void) kill_proc(1, SIGPWR, 1); } #endif + + if (mcck.kill_task) { + local_irq_enable(); + printk(KERN_EMERG "mcck: Terminating task because of machine " + "malfunction (code 0x%016llx).\n", mcck.mcck_code); + printk(KERN_EMERG "mcck: task: %s, pid: %d.\n", + current->comm, current->pid); + do_exit(SIGSEGV); + } +} + +/* + * returns 0 if all registers could be validated + * returns 1 otherwise + */ +static int +s390_revalidate_registers(struct mci *mci) +{ + int kill_task; + u64 tmpclock; + u64 zero; + void *fpt_save_area, *fpt_creg_save_area; + + kill_task = 0; + zero = 0; + /* General purpose registers */ + if (!mci->gr) + /* + * General purpose registers couldn't be restored and have + * unknown contents. Process needs to be terminated. + */ + kill_task = 1; + + /* Revalidate floating point registers */ + if (!mci->fp) + /* + * Floating point registers can't be restored and + * therefore the process needs to be terminated. + */ + kill_task = 1; + +#ifndef __s390x__ + asm volatile("ld 0,0(%0)\n" + "ld 2,8(%0)\n" + "ld 4,16(%0)\n" + "ld 6,24(%0)" + : : "a" (&S390_lowcore.floating_pt_save_area)); +#endif + + if (MACHINE_HAS_IEEE) { +#ifdef __s390x__ + fpt_save_area = &S390_lowcore.floating_pt_save_area; + fpt_creg_save_area = &S390_lowcore.fpt_creg_save_area; +#else + fpt_save_area = (void *) S390_lowcore.extended_save_area_addr; + fpt_creg_save_area = fpt_save_area+128; +#endif + /* Floating point control register */ + if (!mci->fc) { + /* + * Floating point control register can't be restored. + * Task will be terminated. + */ + asm volatile ("lfpc 0(%0)" : : "a" (&zero)); + kill_task = 1; + + } + else + asm volatile ( + "lfpc 0(%0)" + : : "a" (fpt_creg_save_area)); + + asm volatile("ld 0,0(%0)\n" + "ld 1,8(%0)\n" + "ld 2,16(%0)\n" + "ld 3,24(%0)\n" + "ld 4,32(%0)\n" + "ld 5,40(%0)\n" + "ld 6,48(%0)\n" + "ld 7,56(%0)\n" + "ld 8,64(%0)\n" + "ld 9,72(%0)\n" + "ld 10,80(%0)\n" + "ld 11,88(%0)\n" + "ld 12,96(%0)\n" + "ld 13,104(%0)\n" + "ld 14,112(%0)\n" + "ld 15,120(%0)\n" + : : "a" (fpt_save_area)); + } + + /* Revalidate access registers */ + asm volatile("lam 0,15,0(%0)" + : : "a" (&S390_lowcore.access_regs_save_area)); + if (!mci->ar) + /* + * Access registers have unknown contents. + * Terminating task. + */ + kill_task = 1; + + /* Revalidate control registers */ + if (!mci->cr) + /* + * Control registers have unknown contents. + * Can't recover and therefore stopping machine. + */ + s390_handle_damage("invalid control registers."); + else +#ifdef __s390x__ + asm volatile("lctlg 0,15,0(%0)" + : : "a" (&S390_lowcore.cregs_save_area)); +#else + asm volatile("lctl 0,15,0(%0)" + : : "a" (&S390_lowcore.cregs_save_area)); +#endif + + /* + * We don't even try to revalidate the TOD register, since we simply + * can't write something sensible into that register. + */ + +#ifdef __s390x__ + /* + * See if we can revalidate the TOD programmable register with its + * old contents (should be zero) otherwise set it to zero. + */ + if (!mci->pr) + asm volatile("sr 0,0\n" + "sckpf" + : : : "0", "cc"); + else + asm volatile( + "l 0,0(%0)\n" + "sckpf" + : : "a" (&S390_lowcore.tod_progreg_save_area) : "0", "cc"); +#endif + + /* Revalidate clock comparator register */ + asm volatile ("stck 0(%1)\n" + "sckc 0(%1)" + : "=m" (tmpclock) : "a" (&(tmpclock)) : "cc", "memory"); + + /* Check if old PSW is valid */ + if (!mci->wp) + /* + * Can't tell if we come from user or kernel mode + * -> stopping machine. + */ + s390_handle_damage("old psw invalid."); + + if (!mci->ms || !mci->pm || !mci->ia) + kill_task = 1; + + return kill_task; +} + +/* + * machine check handler. + */ +void +s390_do_machine_check(struct pt_regs *regs) +{ + struct mci *mci; + struct mcck_struct *mcck; + int umode; + + mci = (struct mci *) &S390_lowcore.mcck_interruption_code; + mcck = &__get_cpu_var(cpu_mcck); + umode = user_mode(regs); + + if (mci->sd) + /* System damage -> stopping machine */ + s390_handle_damage("received system damage machine check."); + + if (mci->pd) { + if (mci->b) { + /* Processing backup -> verify if we can survive this */ + u64 z_mcic, o_mcic, t_mcic; +#ifdef __s390x__ + z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29); + o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 | + 1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 | + 1ULL<<30 | 1ULL<<21 | 1ULL<<20 | 1ULL<<17 | + 1ULL<<16); +#else + z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<57 | 1ULL<<50 | + 1ULL<<29); + o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 | + 1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 | + 1ULL<<30 | 1ULL<<20 | 1ULL<<17 | 1ULL<<16); +#endif + t_mcic = *(u64 *)mci; + + if (((t_mcic & z_mcic) != 0) || + ((t_mcic & o_mcic) != o_mcic)) { + s390_handle_damage("processing backup machine " + "check with damage."); + } + if (!umode) + s390_handle_damage("processing backup machine " + "check in kernel mode."); + mcck->kill_task = 1; + mcck->mcck_code = *(unsigned long long *) mci; + } + else { + /* Processing damage -> stopping machine */ + s390_handle_damage("received instruction processing " + "damage machine check."); + } + } + if (s390_revalidate_registers(mci)) { + if (umode) { + /* + * Couldn't restore all register contents while in + * user mode -> mark task for termination. + */ + mcck->kill_task = 1; + mcck->mcck_code = *(unsigned long long *) mci; + set_thread_flag(TIF_MCCK_PENDING); + } + else + /* + * Couldn't restore all register contents while in + * kernel mode -> stopping machine. + */ + s390_handle_damage("unable to revalidate registers."); + } + + if (mci->se) + /* Storage error uncorrected */ + s390_handle_damage("received storage error uncorrected " + "machine check."); + + if (mci->ke) + /* Storage key-error uncorrected */ + s390_handle_damage("received storage key-error uncorrected " + "machine check."); + + if (mci->ds && mci->fa) + /* Storage degradation */ + s390_handle_damage("received storage degradation machine " + "check."); + + if (mci->cp) { + /* Channel report word pending */ + mcck->channel_report = 1; + set_thread_flag(TIF_MCCK_PENDING); + } + + if (mci->w) { + /* Warning pending */ + mcck->warning = 1; + set_thread_flag(TIF_MCCK_PENDING); + } } /* @@ -189,9 +443,8 @@ static int machine_check_init(void) { init_MUTEX_LOCKED(&m_sem); - ctl_clear_bit(14, 25); /* disable damage MCH */ - ctl_set_bit(14, 26); /* enable degradation MCH */ - ctl_set_bit(14, 27); /* enable system recovery MCH */ + ctl_clear_bit(14, 25); /* disable external damage MCH */ + ctl_set_bit(14, 27); /* enable system recovery MCH */ #ifdef CONFIG_MACHCHK_WARNING ctl_set_bit(14, 24); /* enable warning MCH */ #endif diff --git a/drivers/s390/s390mach.h b/drivers/s390/s390mach.h index 7e26f0f1b0d..4eaa7017918 100644 --- a/drivers/s390/s390mach.h +++ b/drivers/s390/s390mach.h @@ -16,20 +16,45 @@ struct mci { __u32 sd : 1; /* 00 system damage */ __u32 pd : 1; /* 01 instruction-processing damage */ __u32 sr : 1; /* 02 system recovery */ - __u32 to_be_defined_1 : 4; /* 03-06 */ + __u32 to_be_defined_1 : 1; /* 03 */ + __u32 cd : 1; /* 04 timing-facility damage */ + __u32 ed : 1; /* 05 external damage */ + __u32 to_be_defined_2 : 1; /* 06 */ __u32 dg : 1; /* 07 degradation */ __u32 w : 1; /* 08 warning pending */ __u32 cp : 1; /* 09 channel-report pending */ - __u32 to_be_defined_2 : 6; /* 10-15 */ + __u32 sp : 1; /* 10 service-processor damage */ + __u32 ck : 1; /* 11 channel-subsystem damage */ + __u32 to_be_defined_3 : 2; /* 12-13 */ + __u32 b : 1; /* 14 backed up */ + __u32 to_be_defined_4 : 1; /* 15 */ __u32 se : 1; /* 16 storage error uncorrected */ __u32 sc : 1; /* 17 storage error corrected */ __u32 ke : 1; /* 18 storage-key error uncorrected */ __u32 ds : 1; /* 19 storage degradation */ - __u32 to_be_defined_3 : 4; /* 20-23 */ + __u32 wp : 1; /* 20 psw mwp validity */ + __u32 ms : 1; /* 21 psw mask and key validity */ + __u32 pm : 1; /* 22 psw program mask and cc validity */ + __u32 ia : 1; /* 23 psw instruction address validity */ __u32 fa : 1; /* 24 failing storage address validity */ - __u32 to_be_defined_4 : 7; /* 25-31 */ + __u32 to_be_defined_5 : 1; /* 25 */ + __u32 ec : 1; /* 26 external damage code validity */ + __u32 fp : 1; /* 27 floating point register validity */ + __u32 gr : 1; /* 28 general register validity */ + __u32 cr : 1; /* 29 control register validity */ + __u32 to_be_defined_6 : 1; /* 30 */ + __u32 st : 1; /* 31 storage logical validity */ __u32 ie : 1; /* 32 indirect storage error */ - __u32 to_be_defined_5 : 31; /* 33-63 */ + __u32 ar : 1; /* 33 access register validity */ + __u32 da : 1; /* 34 delayed access exception */ + __u32 to_be_defined_7 : 7; /* 35-41 */ + __u32 pr : 1; /* 42 tod programmable register validity */ + __u32 fc : 1; /* 43 fp control register validity */ + __u32 ap : 1; /* 44 ancillary report */ + __u32 to_be_defined_8 : 1; /* 45 */ + __u32 ct : 1; /* 46 cpu timer validity */ + __u32 cc : 1; /* 47 clock comparator validity */ + __u32 to_be_defined_9 : 16; /* 47-63 */ }; /* diff --git a/include/asm-s390/lowcore.h b/include/asm-s390/lowcore.h index df5172fc589..76b5b19c0ae 100644 --- a/include/asm-s390/lowcore.h +++ b/include/asm-s390/lowcore.h @@ -109,10 +109,14 @@ #ifndef __s390x__ #define __LC_PFAULT_INTPARM 0x080 +#define __LC_CPU_TIMER_SAVE_AREA 0x0D8 #define __LC_AREGS_SAVE_AREA 0x120 +#define __LC_GPREGS_SAVE_AREA 0x180 #define __LC_CREGS_SAVE_AREA 0x1C0 #else /* __s390x__ */ #define __LC_PFAULT_INTPARM 0x11B8 +#define __LC_GPREGS_SAVE_AREA 0x1280 +#define __LC_CPU_TIMER_SAVE_AREA 0x1328 #define __LC_AREGS_SAVE_AREA 0x1340 #define __LC_CREGS_SAVE_AREA 0x1380 #endif /* __s390x__ */ @@ -167,7 +171,8 @@ struct _lowcore __u16 subchannel_nr; /* 0x0ba */ __u32 io_int_parm; /* 0x0bc */ __u32 io_int_word; /* 0x0c0 */ - __u8 pad3[0xD8-0xC4]; /* 0x0c4 */ + __u8 pad3[0xD4-0xC4]; /* 0x0c4 */ + __u32 extended_save_area_addr; /* 0x0d4 */ __u32 cpu_timer_save_area[2]; /* 0x0d8 */ __u32 clock_comp_save_area[2]; /* 0x0e0 */ __u32 mcck_interruption_code[2]; /* 0x0e8 */ diff --git a/include/asm-s390/processor.h b/include/asm-s390/processor.h index fb46e9090b5..8bd14de69e3 100644 --- a/include/asm-s390/processor.h +++ b/include/asm-s390/processor.h @@ -206,6 +206,18 @@ unsigned long get_wchan(struct task_struct *p); asm volatile ("ex 0,%0" : : "i" (__LC_DIAG44_OPCODE) : "memory") #endif /* __s390x__ */ +/* + * Set PSW to specified value. + */ +static inline void __load_psw(psw_t psw) +{ +#ifndef __s390x__ + asm volatile ("lpsw 0(%0)" : : "a" (&psw), "m" (psw) : "cc" ); +#else + asm volatile ("lpswe 0(%0)" : : "a" (&psw), "m" (psw) : "cc" ); +#endif +} + /* * Set PSW mask to specified value, while leaving the * PSW addr pointing to the next instruction. @@ -214,8 +226,8 @@ unsigned long get_wchan(struct task_struct *p); static inline void __load_psw_mask (unsigned long mask) { unsigned long addr; - psw_t psw; + psw.mask = mask; #ifndef __s390x__ @@ -241,30 +253,8 @@ static inline void __load_psw_mask (unsigned long mask) */ static inline void enabled_wait(void) { - unsigned long reg; - psw_t wait_psw; - - wait_psw.mask = PSW_BASE_BITS | PSW_MASK_IO | PSW_MASK_EXT | - PSW_MASK_MCHECK | PSW_MASK_WAIT | PSW_DEFAULT_KEY; -#ifndef __s390x__ - asm volatile ( - " basr %0,0\n" - "0: la %0,1f-0b(%0)\n" - " st %0,4(%1)\n" - " oi 4(%1),0x80\n" - " lpsw 0(%1)\n" - "1:" - : "=&a" (reg) : "a" (&wait_psw), "m" (wait_psw) - : "memory", "cc" ); -#else /* __s390x__ */ - asm volatile ( - " larl %0,0f\n" - " stg %0,8(%1)\n" - " lpswe 0(%1)\n" - "0:" - : "=&a" (reg) : "a" (&wait_psw), "m" (wait_psw) - : "memory", "cc" ); -#endif /* __s390x__ */ + __load_psw_mask(PSW_BASE_BITS | PSW_MASK_IO | PSW_MASK_EXT | + PSW_MASK_MCHECK | PSW_MASK_WAIT | PSW_DEFAULT_KEY); } /* @@ -273,13 +263,11 @@ static inline void enabled_wait(void) static inline void disabled_wait(unsigned long code) { - char psw_buffer[2*sizeof(psw_t)]; unsigned long ctl_buf; - psw_t *dw_psw = (psw_t *)(((unsigned long) &psw_buffer+sizeof(psw_t)-1) - & -sizeof(psw_t)); + psw_t dw_psw; - dw_psw->mask = PSW_BASE_BITS | PSW_MASK_WAIT; - dw_psw->addr = code; + dw_psw.mask = PSW_BASE_BITS | PSW_MASK_WAIT; + dw_psw.addr = code; /* * Store status and then load disabled wait psw, * the processor is dead afterwards @@ -301,7 +289,7 @@ static inline void disabled_wait(unsigned long code) " oi 0x1c0,0x10\n" /* fake protection bit */ " lpsw 0(%1)" : "=m" (ctl_buf) - : "a" (dw_psw), "a" (&ctl_buf), "m" (dw_psw) : "cc" ); + : "a" (&dw_psw), "a" (&ctl_buf), "m" (dw_psw) : "cc" ); #else /* __s390x__ */ asm volatile (" stctg 0,0,0(%2)\n" " ni 4(%2),0xef\n" /* switch off protection */ @@ -333,7 +321,7 @@ static inline void disabled_wait(unsigned long code) " oi 0x384(1),0x10\n" /* fake protection bit */ " lpswe 0(%1)" : "=m" (ctl_buf) - : "a" (dw_psw), "a" (&ctl_buf), + : "a" (&dw_psw), "a" (&ctl_buf), "m" (dw_psw) : "cc", "0", "1"); #endif /* __s390x__ */ } diff --git a/include/asm-s390/ptrace.h b/include/asm-s390/ptrace.h index 4eff8f2e3bf..fc7c96edc69 100644 --- a/include/asm-s390/ptrace.h +++ b/include/asm-s390/ptrace.h @@ -276,7 +276,7 @@ typedef struct #endif /* __s390x__ */ #define PSW_KERNEL_BITS (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_PRIMARY | \ - PSW_DEFAULT_KEY) + PSW_MASK_MCHECK | PSW_DEFAULT_KEY) #define PSW_USER_BITS (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_HOME | \ PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK | \ PSW_MASK_PSTATE | PSW_DEFAULT_KEY) diff --git a/include/asm-s390/system.h b/include/asm-s390/system.h index 81514d76edc..e3cb3ce1d24 100644 --- a/include/asm-s390/system.h +++ b/include/asm-s390/system.h @@ -16,6 +16,7 @@ #include #include #include +#include #ifdef __KERNEL__ @@ -331,9 +332,6 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size) #ifdef __s390x__ -#define __load_psw(psw) \ - __asm__ __volatile__("lpswe 0(%0)" : : "a" (&psw), "m" (psw) : "cc" ); - #define __ctl_load(array, low, high) ({ \ typedef struct { char _[sizeof(array)]; } addrtype; \ __asm__ __volatile__ ( \ @@ -390,9 +388,6 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size) #else /* __s390x__ */ -#define __load_psw(psw) \ - __asm__ __volatile__("lpsw 0(%0)" : : "a" (&psw) : "cc" ); - #define __ctl_load(array, low, high) ({ \ typedef struct { char _[sizeof(array)]; } addrtype; \ __asm__ __volatile__ ( \ @@ -451,6 +446,20 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size) /* For spinlocks etc */ #define local_irq_save(x) ((x) = local_irq_disable()) +/* + * Use to set psw mask except for the first byte which + * won't be changed by this function. + */ +static inline void +__set_psw_mask(unsigned long mask) +{ + local_save_flags(mask); + __load_psw_mask(mask); +} + +#define local_mcck_enable() __set_psw_mask(PSW_KERNEL_BITS) +#define local_mcck_disable() __set_psw_mask(PSW_KERNEL_BITS & ~PSW_MASK_MCHECK) + #ifdef CONFIG_SMP extern void smp_ctl_set_bit(int cr, int bit); diff --git a/include/asm-s390/thread_info.h b/include/asm-s390/thread_info.h index fe101d41e84..6c18a3f2431 100644 --- a/include/asm-s390/thread_info.h +++ b/include/asm-s390/thread_info.h @@ -96,6 +96,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_RESTART_SVC 4 /* restart svc with new svc number */ #define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */ #define TIF_SINGLE_STEP 6 /* deliver sigtrap on return to user */ +#define TIF_MCCK_PENDING 7 /* machine check handling is pending */ #define TIF_USEDFPU 16 /* FPU was used by this task this quantum (SMP) */ #define TIF_POLLING_NRFLAG 17 /* true if poll_idle() is polling TIF_NEED_RESCHED */ @@ -109,6 +110,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_RESTART_SVC (1< Date: Sat, 25 Jun 2005 14:55:32 -0700 Subject: [PATCH] s390: add vmcp interface Add interface to issue VM control program commands. Signed-off-by: Martin Schwidefsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/defconfig | 1 + arch/s390/kernel/cpcmd.c | 109 ++++++++++++--------- arch/s390/kernel/setup.c | 6 +- arch/s390/kernel/smp.c | 6 +- arch/s390/kernel/traps.c | 2 +- arch/s390/mm/extmem.c | 4 +- drivers/s390/Kconfig | 7 ++ drivers/s390/char/Makefile | 1 + drivers/s390/char/con3215.c | 4 +- drivers/s390/char/con3270.c | 4 +- drivers/s390/char/vmcp.c | 219 +++++++++++++++++++++++++++++++++++++++++++ drivers/s390/char/vmcp.h | 30 ++++++ drivers/s390/char/vmlogrdr.c | 10 +- drivers/s390/net/smsgiucv.c | 4 +- include/asm-s390/cpcmd.h | 18 +++- 15 files changed, 359 insertions(+), 66 deletions(-) create mode 100644 drivers/s390/char/vmcp.c create mode 100644 drivers/s390/char/vmcp.h diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 07fd0414a4b..89850b2c27e 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -245,6 +245,7 @@ CONFIG_S390_TAPE_BLOCK=y # CONFIG_S390_TAPE_34XX=m # CONFIG_VMLOGRDR is not set +# CONFIG_VMCP is not set # CONFIG_MONREADER is not set # CONFIG_DCSS_SHM is not set diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c index 44df8dc07c5..20062145e84 100644 --- a/arch/s390/kernel/cpcmd.c +++ b/arch/s390/kernel/cpcmd.c @@ -2,7 +2,7 @@ * arch/s390/kernel/cpcmd.c * * S390 version - * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright (C) 1999,2005 IBM Deutschland Entwicklung GmbH, IBM Corporation * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Christian Borntraeger (cborntra@de.ibm.com), */ @@ -18,93 +18,114 @@ #include static DEFINE_SPINLOCK(cpcmd_lock); -static char cpcmd_buf[240]; +static char cpcmd_buf[241]; /* * the caller of __cpcmd has to ensure that the response buffer is below 2 GB */ -void __cpcmd(char *cmd, char *response, int rlen) +int __cpcmd(const char *cmd, char *response, int rlen, int *response_code) { const int mask = 0x40000000L; unsigned long flags; + int return_code; + int return_len; int cmdlen; spin_lock_irqsave(&cpcmd_lock, flags); cmdlen = strlen(cmd); BUG_ON(cmdlen > 240); - strcpy(cpcmd_buf, cmd); + memcpy(cpcmd_buf, cmd, cmdlen); ASCEBC(cpcmd_buf, cmdlen); if (response != NULL && rlen > 0) { memset(response, 0, rlen); #ifndef CONFIG_ARCH_S390X - asm volatile ("LRA 2,0(%0)\n\t" - "LR 4,%1\n\t" - "O 4,%4\n\t" - "LRA 3,0(%2)\n\t" - "LR 5,%3\n\t" - ".long 0x83240008 # Diagnose X'08'\n\t" - : /* no output */ - : "a" (cpcmd_buf), "d" (cmdlen), - "a" (response), "d" (rlen), "m" (mask) - : "cc", "2", "3", "4", "5" ); + asm volatile ( "lra 2,0(%2)\n" + "lr 4,%3\n" + "o 4,%6\n" + "lra 3,0(%4)\n" + "lr 5,%5\n" + "diag 2,4,0x8\n" + "brc 8, .Litfits\n" + "ar 5, %5\n" + ".Litfits: \n" + "lr %0,4\n" + "lr %1,5\n" + : "=d" (return_code), "=d" (return_len) + : "a" (cpcmd_buf), "d" (cmdlen), + "a" (response), "d" (rlen), "m" (mask) + : "cc", "2", "3", "4", "5" ); #else /* CONFIG_ARCH_S390X */ - asm volatile (" lrag 2,0(%0)\n" - " lgr 4,%1\n" - " o 4,%4\n" - " lrag 3,0(%2)\n" - " lgr 5,%3\n" - " sam31\n" - " .long 0x83240008 # Diagnose X'08'\n" - " sam64" - : /* no output */ - : "a" (cpcmd_buf), "d" (cmdlen), - "a" (response), "d" (rlen), "m" (mask) - : "cc", "2", "3", "4", "5" ); + asm volatile ( "lrag 2,0(%2)\n" + "lgr 4,%3\n" + "o 4,%6\n" + "lrag 3,0(%4)\n" + "lgr 5,%5\n" + "sam31\n" + "diag 2,4,0x8\n" + "sam64\n" + "brc 8, .Litfits\n" + "agr 5, %5\n" + ".Litfits: \n" + "lgr %0,4\n" + "lgr %1,5\n" + : "=d" (return_code), "=d" (return_len) + : "a" (cpcmd_buf), "d" (cmdlen), + "a" (response), "d" (rlen), "m" (mask) + : "cc", "2", "3", "4", "5" ); #endif /* CONFIG_ARCH_S390X */ EBCASC(response, rlen); } else { + return_len = 0; #ifndef CONFIG_ARCH_S390X - asm volatile ("LRA 2,0(%0)\n\t" - "LR 3,%1\n\t" - ".long 0x83230008 # Diagnose X'08'\n\t" - : /* no output */ - : "a" (cpcmd_buf), "d" (cmdlen) - : "2", "3" ); + asm volatile ( "lra 2,0(%1)\n" + "lr 3,%2\n" + "diag 2,3,0x8\n" + "lr %0,3\n" + : "=d" (return_code) + : "a" (cpcmd_buf), "d" (cmdlen) + : "2", "3" ); #else /* CONFIG_ARCH_S390X */ - asm volatile (" lrag 2,0(%0)\n" - " lgr 3,%1\n" - " sam31\n" - " .long 0x83230008 # Diagnose X'08'\n" - " sam64" - : /* no output */ - : "a" (cpcmd_buf), "d" (cmdlen) - : "2", "3" ); + asm volatile ( "lrag 2,0(%1)\n" + "lgr 3,%2\n" + "sam31\n" + "diag 2,3,0x8\n" + "sam64\n" + "lgr %0,3\n" + : "=d" (return_code) + : "a" (cpcmd_buf), "d" (cmdlen) + : "2", "3" ); #endif /* CONFIG_ARCH_S390X */ } spin_unlock_irqrestore(&cpcmd_lock, flags); + if (response_code != NULL) + *response_code = return_code; + return return_len; } EXPORT_SYMBOL(__cpcmd); #ifdef CONFIG_ARCH_S390X -void cpcmd(char *cmd, char *response, int rlen) +int cpcmd(const char *cmd, char *response, int rlen, int *response_code) { char *lowbuf; + int len; + if ((rlen == 0) || (response == NULL) || !((unsigned long)response >> 31)) - __cpcmd(cmd, response, rlen); + len = __cpcmd(cmd, response, rlen, response_code); else { lowbuf = kmalloc(rlen, GFP_KERNEL | GFP_DMA); if (!lowbuf) { printk(KERN_WARNING "cpcmd: could not allocate response buffer\n"); - return; + return -ENOMEM; } - __cpcmd(cmd, lowbuf, rlen); + len = __cpcmd(cmd, lowbuf, rlen, response_code); memcpy(response, lowbuf, rlen); kfree(lowbuf); } + return len; } EXPORT_SYMBOL(cpcmd); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index eb7be0ad717..b6d740ac0e6 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -198,11 +198,11 @@ static void __init conmode_default(void) char *ptr; if (MACHINE_IS_VM) { - __cpcmd("QUERY CONSOLE", query_buffer, 1024); + __cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL); console_devno = simple_strtoul(query_buffer + 5, NULL, 16); ptr = strstr(query_buffer, "SUBCHANNEL ="); console_irq = simple_strtoul(ptr + 13, NULL, 16); - __cpcmd("QUERY TERM", query_buffer, 1024); + __cpcmd("QUERY TERM", query_buffer, 1024, NULL); ptr = strstr(query_buffer, "CONMODE"); /* * Set the conmode to 3215 so that the device recognition @@ -211,7 +211,7 @@ static void __init conmode_default(void) * 3215 and the 3270 driver will try to access the console * device (3215 as console and 3270 as normal tty). */ - __cpcmd("TERM CONMODE 3215", NULL, 0); + __cpcmd("TERM CONMODE 3215", NULL, 0, NULL); if (ptr == NULL) { #if defined(CONFIG_SCLP_CONSOLE) SET_CONSOLE_SCLP; diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 50c335067cf..642572a8e33 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -284,7 +284,7 @@ static void do_machine_restart(void * __unused) * locks are always held disabled). */ if (MACHINE_IS_VM) - cpcmd ("IPL", NULL, 0); + cpcmd ("IPL", NULL, 0, NULL); else reipl (0x10000 | S390_lowcore.ipl_device); } @@ -313,7 +313,7 @@ static void do_machine_halt(void * __unused) if (atomic_compare_and_swap(-1, smp_processor_id(), &cpuid) == 0) { smp_send_stop(); if (MACHINE_IS_VM && strlen(vmhalt_cmd) > 0) - cpcmd(vmhalt_cmd, NULL, 0); + cpcmd(vmhalt_cmd, NULL, 0, NULL); signal_processor(smp_processor_id(), sigp_stop_and_store_status); } @@ -332,7 +332,7 @@ static void do_machine_power_off(void * __unused) if (atomic_compare_and_swap(-1, smp_processor_id(), &cpuid) == 0) { smp_send_stop(); if (MACHINE_IS_VM && strlen(vmpoff_cmd) > 0) - cpcmd(vmpoff_cmd, NULL, 0); + cpcmd(vmpoff_cmd, NULL, 0, NULL); signal_processor(smp_processor_id(), sigp_stop_and_store_status); } diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index ca34b6f34b3..bc7b7be7acb 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -735,7 +735,7 @@ void __init trap_init(void) &ext_int_pfault); #endif #ifndef CONFIG_ARCH_S390X - cpcmd("SET PAGEX ON", NULL, 0); + cpcmd("SET PAGEX ON", NULL, 0, NULL); #endif } } diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 648deed17e2..c5348108ca3 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -576,8 +576,8 @@ segment_save(char *name) segtype_string[seg->range[i].start & 0xff]); } sprintf(cmd2, "SAVESEG %s", name); - cpcmd(cmd1, NULL, 0); - cpcmd(cmd2, NULL, 0); + cpcmd(cmd1, NULL, 0, NULL); + cpcmd(cmd2, NULL, 0, NULL); spin_unlock(&dcss_lock); } diff --git a/drivers/s390/Kconfig b/drivers/s390/Kconfig index 96413c2cd1a..a86a650f3d6 100644 --- a/drivers/s390/Kconfig +++ b/drivers/s390/Kconfig @@ -187,6 +187,13 @@ config VMLOGRDR *SYMPTOM. This driver depends on the IUCV support driver. +config VMCP + tristate "Support for the z/VM CP interface (VM only)" + help + Select this option if you want to be able to interact with the control + program on z/VM + + config MONREADER tristate "API for reading z/VM monitor service records" depends on IUCV diff --git a/drivers/s390/char/Makefile b/drivers/s390/char/Makefile index 14e8cce9f86..6377a96735d 100644 --- a/drivers/s390/char/Makefile +++ b/drivers/s390/char/Makefile @@ -19,6 +19,7 @@ obj-$(CONFIG_SCLP_CPI) += sclp_cpi.o obj-$(CONFIG_ZVM_WATCHDOG) += vmwatchdog.o obj-$(CONFIG_VMLOGRDR) += vmlogrdr.o +obj-$(CONFIG_VMCP) += vmcp.o tape-$(CONFIG_S390_TAPE_BLOCK) += tape_block.o tape-$(CONFIG_PROC_FS) += tape_proc.o diff --git a/drivers/s390/char/con3215.c b/drivers/s390/char/con3215.c index 022f17bff73..f11a67fda40 100644 --- a/drivers/s390/char/con3215.c +++ b/drivers/s390/char/con3215.c @@ -860,8 +860,8 @@ con3215_init(void) /* Set the console mode for VM */ if (MACHINE_IS_VM) { - cpcmd("TERM CONMODE 3215", NULL, 0); - cpcmd("TERM AUTOCR OFF", NULL, 0); + cpcmd("TERM CONMODE 3215", NULL, 0, NULL); + cpcmd("TERM AUTOCR OFF", NULL, 0, NULL); } /* allocate 3215 request structures */ diff --git a/drivers/s390/char/con3270.c b/drivers/s390/char/con3270.c index d52fb57a6b1..fc7a213e591 100644 --- a/drivers/s390/char/con3270.c +++ b/drivers/s390/char/con3270.c @@ -591,8 +591,8 @@ con3270_init(void) /* Set the console mode for VM */ if (MACHINE_IS_VM) { - cpcmd("TERM CONMODE 3270", 0, 0); - cpcmd("TERM AUTOCR OFF", 0, 0); + cpcmd("TERM CONMODE 3270", NULL, 0, NULL); + cpcmd("TERM AUTOCR OFF", NULL, 0, NULL); } cdev = ccw_device_probe_console(); diff --git a/drivers/s390/char/vmcp.c b/drivers/s390/char/vmcp.c new file mode 100644 index 00000000000..dfbbf235ca2 --- /dev/null +++ b/drivers/s390/char/vmcp.c @@ -0,0 +1,219 @@ +/* + * Copyright (C) 2004,2005 IBM Corporation + * Interface implementation for communication with the v/VM control program + * Author(s): Christian Borntraeger + * + * + * z/VMs CP offers the possibility to issue commands via the diagnose code 8 + * this driver implements a character device that issues these commands and + * returns the answer of CP. + + * The idea of this driver is based on cpint from Neale Ferguson and #CP in CMS + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "vmcp.h" + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Christian Borntraeger "); +MODULE_DESCRIPTION("z/VM CP interface"); + +static debug_info_t *vmcp_debug; + +static int vmcp_open(struct inode *inode, struct file *file) +{ + struct vmcp_session *session; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + session = kmalloc(sizeof(*session), GFP_KERNEL); + if (!session) + return -ENOMEM; + session->bufsize = PAGE_SIZE; + session->response = NULL; + session->resp_size = 0; + init_MUTEX(&session->mutex); + file->private_data = session; + return nonseekable_open(inode, file); +} + +static int vmcp_release(struct inode *inode, struct file *file) +{ + struct vmcp_session *session; + + session = (struct vmcp_session *)file->private_data; + file->private_data = NULL; + free_pages((unsigned long)session->response, get_order(session->bufsize)); + kfree(session); + return 0; +} + +static ssize_t +vmcp_read(struct file *file, char __user * buff, size_t count, loff_t * ppos) +{ + size_t tocopy; + struct vmcp_session *session; + + session = (struct vmcp_session *)file->private_data; + if (down_interruptible(&session->mutex)) + return -ERESTARTSYS; + if (!session->response) { + up(&session->mutex); + return 0; + } + if (*ppos > session->resp_size) { + up(&session->mutex); + return 0; + } + tocopy = min(session->resp_size - (size_t) (*ppos), count); + tocopy = min(tocopy,session->bufsize - (size_t) (*ppos)); + + if (copy_to_user(buff, session->response + (*ppos), tocopy)) { + up(&session->mutex); + return -EFAULT; + } + up(&session->mutex); + *ppos += tocopy; + return tocopy; +} + +static ssize_t +vmcp_write(struct file *file, const char __user * buff, size_t count, + loff_t * ppos) +{ + char *cmd; + struct vmcp_session *session; + + if (count > 240) + return -EINVAL; + cmd = kmalloc(count + 1, GFP_KERNEL); + if (!cmd) + return -ENOMEM; + if (copy_from_user(cmd, buff, count)) { + kfree(cmd); + return -EFAULT; + } + cmd[count] = '\0'; + session = (struct vmcp_session *)file->private_data; + if (down_interruptible(&session->mutex)) + return -ERESTARTSYS; + if (!session->response) + session->response = (char *)__get_free_pages(GFP_KERNEL + | __GFP_REPEAT | GFP_DMA, + get_order(session->bufsize)); + if (!session->response) { + up(&session->mutex); + kfree(cmd); + return -ENOMEM; + } + debug_text_event(vmcp_debug, 1, cmd); + session->resp_size = cpcmd(cmd, session->response, + session->bufsize, + &session->resp_code); + up(&session->mutex); + kfree(cmd); + *ppos = 0; /* reset the file pointer after a command */ + return count; +} + + +/* + * These ioctls are available, as the semantics of the diagnose 8 call + * does not fit very well into a Linux call. Diagnose X'08' is described in + * CP Programming Services SC24-6084-00 + * + * VMCP_GETCODE: gives the CP return code back to user space + * VMCP_SETBUF: sets the response buffer for the next write call. diagnose 8 + * expects adjacent pages in real storage and to make matters worse, we + * dont know the size of the response. Therefore we default to PAGESIZE and + * let userspace to change the response size, if userspace expects a bigger + * response + */ +static long vmcp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct vmcp_session *session; + int temp; + + session = (struct vmcp_session *)file->private_data; + if (down_interruptible(&session->mutex)) + return -ERESTARTSYS; + switch (cmd) { + case VMCP_GETCODE: + temp = session->resp_code; + up(&session->mutex); + return put_user(temp, (int __user *)arg); + case VMCP_SETBUF: + free_pages((unsigned long)session->response, + get_order(session->bufsize)); + session->response=NULL; + temp = get_user(session->bufsize, (int __user *)arg); + if (get_order(session->bufsize) > 8) { + session->bufsize = PAGE_SIZE; + temp = -EINVAL; + } + up(&session->mutex); + return temp; + case VMCP_GETSIZE: + temp = session->resp_size; + up(&session->mutex); + return put_user(temp, (int __user *)arg); + default: + up(&session->mutex); + return -ENOIOCTLCMD; + } +} + +static struct file_operations vmcp_fops = { + .owner = THIS_MODULE, + .open = &vmcp_open, + .release = &vmcp_release, + .read = &vmcp_read, + .llseek = &no_llseek, + .write = &vmcp_write, + .unlocked_ioctl = &vmcp_ioctl, + .compat_ioctl = &vmcp_ioctl +}; + +static struct miscdevice vmcp_dev = { + .name = "vmcp", + .minor = MISC_DYNAMIC_MINOR, + .fops = &vmcp_fops, +}; + +static int __init vmcp_init(void) +{ + int ret; + + if (!MACHINE_IS_VM) { + printk(KERN_WARNING + "z/VM CP interface is only available under z/VM\n"); + return -ENODEV; + } + ret = misc_register(&vmcp_dev); + if (!ret) + printk(KERN_INFO "z/VM CP interface loaded\n"); + else + printk(KERN_WARNING + "z/VM CP interface not loaded. Could not register misc device.\n"); + vmcp_debug = debug_register("vmcp", 0, 1, 240); + debug_register_view(vmcp_debug, &debug_hex_ascii_view); + return ret; +} + +static void __exit vmcp_exit(void) +{ + WARN_ON(misc_deregister(&vmcp_dev) != 0); + debug_unregister(vmcp_debug); + printk(KERN_INFO "z/VM CP interface unloaded.\n"); +} + +module_init(vmcp_init); +module_exit(vmcp_exit); diff --git a/drivers/s390/char/vmcp.h b/drivers/s390/char/vmcp.h new file mode 100644 index 00000000000..87389e73046 --- /dev/null +++ b/drivers/s390/char/vmcp.h @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2004, 2005 IBM Corporation + * Interface implementation for communication with the v/VM control program + * Version 1.0 + * Author(s): Christian Borntraeger + * + * + * z/VMs CP offers the possibility to issue commands via the diagnose code 8 + * this driver implements a character device that issues these commands and + * returns the answer of CP. + * + * The idea of this driver is based on cpint from Neale Ferguson + */ + +#include +#include + +#define VMCP_GETCODE _IOR(0x10, 1, int) +#define VMCP_SETBUF _IOW(0x10, 2, int) +#define VMCP_GETSIZE _IOR(0x10, 3, int) + +struct vmcp_session { + unsigned int bufsize; + char *response; + int resp_size; + int resp_code; + /* As we use copy_from/to_user, which might * + * sleep and cannot use a spinlock */ + struct semaphore mutex; +}; diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c index f7717327d15..491f00c032e 100644 --- a/drivers/s390/char/vmlogrdr.c +++ b/drivers/s390/char/vmlogrdr.c @@ -236,7 +236,7 @@ vmlogrdr_get_recording_class_AB(void) { int len,i; printk (KERN_DEBUG "vmlogrdr: query command: %s\n", cp_command); - cpcmd(cp_command, cp_response, sizeof(cp_response)); + cpcmd(cp_command, cp_response, sizeof(cp_response), NULL); printk (KERN_DEBUG "vmlogrdr: response: %s", cp_response); len = strnlen(cp_response,sizeof(cp_response)); // now the parsing @@ -288,7 +288,7 @@ vmlogrdr_recording(struct vmlogrdr_priv_t * logptr, int action, int purge) { printk (KERN_DEBUG "vmlogrdr: recording command: %s\n", cp_command); - cpcmd(cp_command, cp_response, sizeof(cp_response)); + cpcmd(cp_command, cp_response, sizeof(cp_response), NULL); printk (KERN_DEBUG "vmlogrdr: recording response: %s", cp_response); } @@ -301,7 +301,7 @@ vmlogrdr_recording(struct vmlogrdr_priv_t * logptr, int action, int purge) { qid_string); printk (KERN_DEBUG "vmlogrdr: recording command: %s\n", cp_command); - cpcmd(cp_command, cp_response, sizeof(cp_response)); + cpcmd(cp_command, cp_response, sizeof(cp_response), NULL); printk (KERN_DEBUG "vmlogrdr: recording response: %s", cp_response); /* The recording command will usually answer with 'Command complete' @@ -607,7 +607,7 @@ vmlogrdr_purge_store(struct device * dev, struct device_attribute *attr, const c priv->recording_name); printk (KERN_DEBUG "vmlogrdr: recording command: %s\n", cp_command); - cpcmd(cp_command, cp_response, sizeof(cp_response)); + cpcmd(cp_command, cp_response, sizeof(cp_response), NULL); printk (KERN_DEBUG "vmlogrdr: recording response: %s", cp_response); @@ -682,7 +682,7 @@ vmlogrdr_recording_status_show(struct device_driver *driver, char *buf) { char cp_command[] = "QUERY RECORDING "; int len; - cpcmd(cp_command, buf, 4096); + cpcmd(cp_command, buf, 4096, NULL); len = strlen(buf); return len; } diff --git a/drivers/s390/net/smsgiucv.c b/drivers/s390/net/smsgiucv.c index 1e3f7f3c662..d6469baa7e1 100644 --- a/drivers/s390/net/smsgiucv.c +++ b/drivers/s390/net/smsgiucv.c @@ -138,7 +138,7 @@ static void __exit smsg_exit(void) { if (smsg_handle > 0) { - cpcmd("SET SMSG OFF", 0, 0); + cpcmd("SET SMSG OFF", NULL, 0, NULL); iucv_sever(smsg_pathid, 0); iucv_unregister_program(smsg_handle); driver_unregister(&smsg_driver); @@ -177,7 +177,7 @@ smsg_init(void) smsg_handle = 0; return -EIO; } - cpcmd("SET SMSG IUCV", 0, 0); + cpcmd("SET SMSG IUCV", NULL, 0, NULL); return 0; } diff --git a/include/asm-s390/cpcmd.h b/include/asm-s390/cpcmd.h index 1d33c5da083..1fcf65be7a2 100644 --- a/include/asm-s390/cpcmd.h +++ b/include/asm-s390/cpcmd.h @@ -11,14 +11,28 @@ #define __CPCMD__ /* + * the lowlevel function for cpcmd * the caller of __cpcmd has to ensure that the response buffer is below 2 GB */ -extern void __cpcmd(char *cmd, char *response, int rlen); +extern int __cpcmd(const char *cmd, char *response, int rlen, int *response_code); #ifndef __s390x__ #define cpcmd __cpcmd #else -extern void cpcmd(char *cmd, char *response, int rlen); +/* + * cpcmd is the in-kernel interface for issuing CP commands + * + * cmd: null-terminated command string, max 240 characters + * response: response buffer for VM's textual response + * rlen: size of the response buffer, cpcmd will not exceed this size + * but will cap the output, if its too large. Everything that + * did not fit into the buffer will be silently dropped + * response_code: return pointer for VM's error code + * return value: the size of the response. The caller can check if the buffer + * was large enough by comparing the return value and rlen + * NOTE: If the response buffer is not below 2 GB, cpcmd can sleep + */ +extern int cpcmd(const char *cmd, char *response, int rlen, int *response_code); #endif /*__s390x__*/ #endif -- cgit v1.2.3 From 66a464dbc8e0345b6f972b92bf1118e043d7c987 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Sat, 25 Jun 2005 14:55:33 -0700 Subject: [PATCH] s390: debug feature changes This patch changes the memory allocation method for the s390 debug feature. Trace buffers had been allocated using the get_free_pages() function before. Therefore it was not possible to get big memory areas in a running system due to memory fragmentation. Now the trace buffers are subdivided into several subbuffers with pagesize. Therefore it is now possible to allocate more memory for the trace buffers and more trace records can be written. In addition to that, dynamic specification of the size of the trace buffers is implemented. It is now possible to change the size of a trace buffer using a new debugfs file instance. When writing a number into this file, the trace buffer size is changed to 'number * pagesize'. In the past all the traces could be obtained from userspace by accessing files in the "proc" filesystem. Now with debugfs we have a new filesystem which should be used for debugging purposes. This patch moves the debug feature from procfs to debugfs. Since the interface of debug_register() changed, all device drivers, which use the debug feature had to be adjusted. Signed-off-by: Martin Schwidefsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/s390/s390dbf.txt | 79 ++-- arch/s390/kernel/debug.c | 820 +++++++++++++++++++++++++++-------------- drivers/s390/block/dasd.c | 4 +- drivers/s390/block/dasd_proc.c | 3 +- drivers/s390/char/tape_34xx.c | 6 +- drivers/s390/char/tape_core.c | 2 +- drivers/s390/char/tape_proc.c | 1 + drivers/s390/char/vmcp.c | 2 +- drivers/s390/cio/cio.c | 8 +- drivers/s390/cio/qdio.c | 14 +- drivers/s390/cio/qdio.h | 16 +- drivers/s390/net/claw.c | 4 +- drivers/s390/net/ctcdbug.c | 10 +- drivers/s390/net/ctcdbug.h | 10 +- drivers/s390/net/iucv.h | 6 +- drivers/s390/net/lcs.c | 8 +- drivers/s390/net/netiucv.c | 12 +- drivers/s390/net/qeth.h | 14 +- drivers/s390/net/qeth_main.c | 14 +- include/asm-s390/debug.h | 48 ++- 20 files changed, 678 insertions(+), 403 deletions(-) diff --git a/Documentation/s390/s390dbf.txt b/Documentation/s390/s390dbf.txt index 2d1cd939b4d..e24fdeada97 100644 --- a/Documentation/s390/s390dbf.txt +++ b/Documentation/s390/s390dbf.txt @@ -12,8 +12,8 @@ where log records can be stored efficiently in memory, where each component One purpose of this is to inspect the debug logs after a production system crash in order to analyze the reason for the crash. If the system still runs but only a subcomponent which uses dbf failes, -it is possible to look at the debug logs on a live system via the Linux proc -filesystem. +it is possible to look at the debug logs on a live system via the Linux +debugfs filesystem. The debug feature may also very useful for kernel and driver development. Design: @@ -52,16 +52,18 @@ Each debug entry contains the following data: - Flag, if entry is an exception or not The debug logs can be inspected in a live system through entries in -the proc-filesystem. Under the path /proc/s390dbf there is +the debugfs-filesystem. Under the toplevel directory "s390dbf" there is a directory for each registered component, which is named like the -corresponding component. +corresponding component. The debugfs normally should be mounted to +/sys/kernel/debug therefore the debug feature can be accessed unter +/sys/kernel/debug/s390dbf. The content of the directories are files which represent different views to the debug log. Each component can decide which views should be used through registering them with the function debug_register_view(). Predefined views for hex/ascii, sprintf and raw binary data are provided. It is also possible to define other views. The content of -a view can be inspected simply by reading the corresponding proc file. +a view can be inspected simply by reading the corresponding debugfs file. All debug logs have an an actual debug level (range from 0 to 6). The default level is 3. Event and Exception functions have a 'level' @@ -69,14 +71,14 @@ parameter. Only debug entries with a level that is lower or equal than the actual level are written to the log. This means, when writing events, high priority log entries should have a low level value whereas low priority entries should have a high one. -The actual debug level can be changed with the help of the proc-filesystem -through writing a number string "x" to the 'level' proc file which is +The actual debug level can be changed with the help of the debugfs-filesystem +through writing a number string "x" to the 'level' debugfs file which is provided for every debug log. Debugging can be switched off completely -by using "-" on the 'level' proc file. +by using "-" on the 'level' debugfs file. Example: -> echo "-" > /proc/s390dbf/dasd/level +> echo "-" > /sys/kernel/debug/s390dbf/dasd/level It is also possible to deactivate the debug feature globally for every debug log. You can change the behavior using 2 sysctl parameters in @@ -99,11 +101,11 @@ Kernel Interfaces: ------------------ ---------------------------------------------------------------------------- -debug_info_t *debug_register(char *name, int pages_index, int nr_areas, +debug_info_t *debug_register(char *name, int pages, int nr_areas, int buf_size); -Parameter: name: Name of debug log (e.g. used for proc entry) - pages_index: 2^pages_index pages will be allocated per area +Parameter: name: Name of debug log (e.g. used for debugfs entry) + pages: number of pages, which will be allocated per area nr_areas: number of debug areas buf_size: size of data area in each debug entry @@ -134,7 +136,7 @@ Return Value: none Description: Sets new actual debug level if new_level is valid. --------------------------------------------------------------------------- -+void debug_stop_all(void); +void debug_stop_all(void); Parameter: none @@ -270,7 +272,7 @@ Parameter: id: handle for debug log Return Value: 0 : ok < 0: Error -Description: registers new debug view and creates proc dir entry +Description: registers new debug view and creates debugfs dir entry --------------------------------------------------------------------------- int debug_unregister_view (debug_info_t * id, struct debug_view *view); @@ -281,7 +283,7 @@ Parameter: id: handle for debug log Return Value: 0 : ok < 0: Error -Description: unregisters debug view and removes proc dir entry +Description: unregisters debug view and removes debugfs dir entry @@ -308,7 +310,7 @@ static int init(void) { /* register 4 debug areas with one page each and 4 byte data field */ - debug_info = debug_register ("test", 0, 4, 4 ); + debug_info = debug_register ("test", 1, 4, 4 ); debug_register_view(debug_info,&debug_hex_ascii_view); debug_register_view(debug_info,&debug_raw_view); @@ -343,7 +345,7 @@ static int init(void) /* register 4 debug areas with one page each and data field for */ /* format string pointer + 2 varargs (= 3 * sizeof(long)) */ - debug_info = debug_register ("test", 0, 4, sizeof(long) * 3); + debug_info = debug_register ("test", 1, 4, sizeof(long) * 3); debug_register_view(debug_info,&debug_sprintf_view); debug_sprintf_event(debug_info, 2 , "first event in %s:%i\n",__FILE__,__LINE__); @@ -362,16 +364,16 @@ module_exit(cleanup); -ProcFS Interface +Debugfs Interface ---------------- Views to the debug logs can be investigated through reading the corresponding -proc-files: +debugfs-files: Example: -> ls /proc/s390dbf/dasd -flush hex_ascii level raw -> cat /proc/s390dbf/dasd/hex_ascii | sort +1 +> ls /sys/kernel/debug/s390dbf/dasd +flush hex_ascii level pages raw +> cat /sys/kernel/debug/s390dbf/dasd/hex_ascii | sort +1 00 00974733272:680099 2 - 02 0006ad7e 07 ea 4a 90 | .... 00 00974733272:682210 2 - 02 0006ade6 46 52 45 45 | FREE 00 00974733272:682213 2 - 02 0006adf6 07 ea 4a 90 | .... @@ -391,25 +393,36 @@ Changing the debug level Example: -> cat /proc/s390dbf/dasd/level +> cat /sys/kernel/debug/s390dbf/dasd/level 3 -> echo "5" > /proc/s390dbf/dasd/level -> cat /proc/s390dbf/dasd/level +> echo "5" > /sys/kernel/debug/s390dbf/dasd/level +> cat /sys/kernel/debug/s390dbf/dasd/level 5 Flushing debug areas -------------------- Debug areas can be flushed with piping the number of the desired -area (0...n) to the proc file "flush". When using "-" all debug areas +area (0...n) to the debugfs file "flush". When using "-" all debug areas are flushed. Examples: 1. Flush debug area 0: -> echo "0" > /proc/s390dbf/dasd/flush +> echo "0" > /sys/kernel/debug/s390dbf/dasd/flush 2. Flush all debug areas: -> echo "-" > /proc/s390dbf/dasd/flush +> echo "-" > /sys/kernel/debug/s390dbf/dasd/flush + +Changing the size of debug areas +------------------------------------ +It is possible the change the size of debug areas through piping +the number of pages to the debugfs file "pages". The resize request will +also flush the debug areas. + +Example: + +Define 4 pages for the debug areas of debug feature "dasd": +> echo "4" > /sys/kernel/debug/s390dbf/dasd/pages Stooping the debug feature -------------------------- @@ -491,7 +504,7 @@ Defining views -------------- Views are specified with the 'debug_view' structure. There are defined -callback functions which are used for reading and writing the proc files: +callback functions which are used for reading and writing the debugfs files: struct debug_view { char name[DEBUG_MAX_PROCF_LEN]; @@ -525,7 +538,7 @@ typedef int (debug_input_proc_t) (debug_info_t* id, The "private_data" member can be used as pointer to view specific data. It is not used by the debug feature itself. -The output when reading a debug-proc file is structured like this: +The output when reading a debugfs file is structured like this: "prolog_proc output" @@ -534,13 +547,13 @@ The output when reading a debug-proc file is structured like this: "header_proc output 3" "format_proc output 3" ... -When a view is read from the proc fs, the Debug Feature calls the +When a view is read from the debugfs, the Debug Feature calls the 'prolog_proc' once for writing the prolog. Then 'header_proc' and 'format_proc' are called for each existing debug entry. The input_proc can be used to implement functionality when it is written to -the view (e.g. like with 'echo "0" > /proc/s390dbf/dasd/level). +the view (e.g. like with 'echo "0" > /sys/kernel/debug/s390dbf/dasd/level). For header_proc there can be used the default function debug_dflt_header_fn() which is defined in in debug.h. @@ -602,7 +615,7 @@ debug_info = debug_register ("test", 0, 4, 4 )); debug_register_view(debug_info, &debug_test_view); for(i = 0; i < 10; i ++) debug_int_event(debug_info, 1, i); -> cat /proc/s390dbf/test/myview +> cat /sys/kernel/debug/s390dbf/test/myview 00 00964419734:611402 1 - 00 88042ca This error........... 00 00964419734:611405 1 - 00 88042ca That error........... 00 00964419734:611408 1 - 00 88042ca Problem.............. diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 91f8ce5543d..960ba6029c3 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -19,22 +19,27 @@ #include #include #include - #include #include +#include +#include #include #define DEBUG_PROLOG_ENTRY -1 +#define ALL_AREAS 0 /* copy all debug areas */ +#define NO_AREAS 1 /* copy no debug areas */ + /* typedefs */ typedef struct file_private_info { loff_t offset; /* offset of last read in file */ int act_area; /* number of last formated area */ + int act_page; /* act page in given area */ int act_entry; /* last formated entry (offset */ /* relative to beginning of last */ - /* formated area) */ + /* formated page) */ size_t act_entry_offset; /* up to this offset we copied */ /* in last read the last formated */ /* entry to userland */ @@ -51,8 +56,8 @@ typedef struct * This assumes that all args are converted into longs * on L/390 this is the case for all types of parameter * except of floats, and long long (32 bit) - * - */ + * + */ long args[0]; } debug_sprintf_entry_t; @@ -63,32 +68,38 @@ extern void tod_to_timeval(uint64_t todval, struct timeval *xtime); static int debug_init(void); static ssize_t debug_output(struct file *file, char __user *user_buf, - size_t user_len, loff_t * offset); + size_t user_len, loff_t * offset); static ssize_t debug_input(struct file *file, const char __user *user_buf, - size_t user_len, loff_t * offset); + size_t user_len, loff_t * offset); static int debug_open(struct inode *inode, struct file *file); static int debug_close(struct inode *inode, struct file *file); -static debug_info_t* debug_info_create(char *name, int page_order, int nr_areas, int buf_size); +static debug_info_t* debug_info_create(char *name, int pages_per_area, + int nr_areas, int buf_size); static void debug_info_get(debug_info_t *); static void debug_info_put(debug_info_t *); static int debug_prolog_level_fn(debug_info_t * id, - struct debug_view *view, char *out_buf); + struct debug_view *view, char *out_buf); static int debug_input_level_fn(debug_info_t * id, struct debug_view *view, - struct file *file, const char __user *user_buf, - size_t user_buf_size, loff_t * offset); + struct file *file, const char __user *user_buf, + size_t user_buf_size, loff_t * offset); +static int debug_prolog_pages_fn(debug_info_t * id, + struct debug_view *view, char *out_buf); +static int debug_input_pages_fn(debug_info_t * id, struct debug_view *view, + struct file *file, const char __user *user_buf, + size_t user_buf_size, loff_t * offset); static int debug_input_flush_fn(debug_info_t * id, struct debug_view *view, - struct file *file, const char __user *user_buf, - size_t user_buf_size, loff_t * offset); + struct file *file, const char __user *user_buf, + size_t user_buf_size, loff_t * offset); static int debug_hex_ascii_format_fn(debug_info_t * id, struct debug_view *view, - char *out_buf, const char *in_buf); + char *out_buf, const char *in_buf); static int debug_raw_format_fn(debug_info_t * id, - struct debug_view *view, char *out_buf, - const char *in_buf); + struct debug_view *view, char *out_buf, + const char *in_buf); static int debug_raw_header_fn(debug_info_t * id, struct debug_view *view, - int area, debug_entry_t * entry, char *out_buf); + int area, debug_entry_t * entry, char *out_buf); static int debug_sprintf_format_fn(debug_info_t * id, struct debug_view *view, - char *out_buf, debug_sprintf_entry_t *curr_event); + char *out_buf, debug_sprintf_entry_t *curr_event); /* globals */ @@ -119,6 +130,15 @@ struct debug_view debug_level_view = { NULL }; +struct debug_view debug_pages_view = { + "pages", + &debug_prolog_pages_fn, + NULL, + NULL, + &debug_input_pages_fn, + NULL +}; + struct debug_view debug_flush_view = { "flush", NULL, @@ -149,98 +169,161 @@ DECLARE_MUTEX(debug_lock); static int initialized; static struct file_operations debug_file_ops = { - .owner = THIS_MODULE, + .owner = THIS_MODULE, .read = debug_output, - .write = debug_input, + .write = debug_input, .open = debug_open, .release = debug_close, }; -static struct proc_dir_entry *debug_proc_root_entry; +static struct dentry *debug_debugfs_root_entry; /* functions */ +/* + * debug_areas_alloc + * - Debug areas are implemented as a threedimensonal array: + * areas[areanumber][pagenumber][pageoffset] + */ + +static debug_entry_t*** +debug_areas_alloc(int pages_per_area, int nr_areas) +{ + debug_entry_t*** areas; + int i,j; + + areas = (debug_entry_t ***) kmalloc(nr_areas * + sizeof(debug_entry_t**), + GFP_KERNEL); + if (!areas) + goto fail_malloc_areas; + for (i = 0; i < nr_areas; i++) { + areas[i] = (debug_entry_t**) kmalloc(pages_per_area * + sizeof(debug_entry_t*),GFP_KERNEL); + if (!areas[i]) { + goto fail_malloc_areas2; + } + for(j = 0; j < pages_per_area; j++) { + areas[i][j] = (debug_entry_t*)kmalloc(PAGE_SIZE, + GFP_KERNEL); + if(!areas[i][j]) { + for(j--; j >=0 ; j--) { + kfree(areas[i][j]); + } + kfree(areas[i]); + goto fail_malloc_areas2; + } else { + memset(areas[i][j],0,PAGE_SIZE); + } + } + } + return areas; + +fail_malloc_areas2: + for(i--; i >= 0; i--){ + for(j=0; j < pages_per_area;j++){ + kfree(areas[i][j]); + } + kfree(areas[i]); + } + kfree(areas); +fail_malloc_areas: + return NULL; + +} + + /* * debug_info_alloc * - alloc new debug-info */ -static debug_info_t* debug_info_alloc(char *name, int page_order, - int nr_areas, int buf_size) +static debug_info_t* +debug_info_alloc(char *name, int pages_per_area, int nr_areas, int buf_size, + int level, int mode) { debug_info_t* rc; - int i; /* alloc everything */ - rc = (debug_info_t*) kmalloc(sizeof(debug_info_t), GFP_ATOMIC); + rc = (debug_info_t*) kmalloc(sizeof(debug_info_t), GFP_KERNEL); if(!rc) goto fail_malloc_rc; - rc->active_entry = (int*)kmalloc(nr_areas * sizeof(int), GFP_ATOMIC); - if(!rc->active_entry) - goto fail_malloc_active_entry; - memset(rc->active_entry, 0, nr_areas * sizeof(int)); - rc->areas = (debug_entry_t **) kmalloc(nr_areas * - sizeof(debug_entry_t *), - GFP_ATOMIC); - if (!rc->areas) - goto fail_malloc_areas; - for (i = 0; i < nr_areas; i++) { - rc->areas[i] = (debug_entry_t *) __get_free_pages(GFP_ATOMIC, - page_order); - if (!rc->areas[i]) { - for (i--; i >= 0; i--) { - free_pages((unsigned long) rc->areas[i], - page_order); - } - goto fail_malloc_areas2; - } else { - memset(rc->areas[i], 0, PAGE_SIZE << page_order); - } + rc->active_entries = (int*)kmalloc(nr_areas * sizeof(int), GFP_KERNEL); + if(!rc->active_entries) + goto fail_malloc_active_entries; + memset(rc->active_entries, 0, nr_areas * sizeof(int)); + rc->active_pages = (int*)kmalloc(nr_areas * sizeof(int), GFP_KERNEL); + if(!rc->active_pages) + goto fail_malloc_active_pages; + memset(rc->active_pages, 0, nr_areas * sizeof(int)); + if((mode == ALL_AREAS) && (pages_per_area != 0)){ + rc->areas = debug_areas_alloc(pages_per_area, nr_areas); + if(!rc->areas) + goto fail_malloc_areas; + } else { + rc->areas = NULL; } /* initialize members */ spin_lock_init(&rc->lock); - rc->page_order = page_order; - rc->nr_areas = nr_areas; - rc->active_area = 0; - rc->level = DEBUG_DEFAULT_LEVEL; - rc->buf_size = buf_size; - rc->entry_size = sizeof(debug_entry_t) + buf_size; - strlcpy(rc->name, name, sizeof(rc->name)); + rc->pages_per_area = pages_per_area; + rc->nr_areas = nr_areas; + rc->active_area = 0; + rc->level = level; + rc->buf_size = buf_size; + rc->entry_size = sizeof(debug_entry_t) + buf_size; + strlcpy(rc->name, name, sizeof(rc->name)-1); memset(rc->views, 0, DEBUG_MAX_VIEWS * sizeof(struct debug_view *)); -#ifdef CONFIG_PROC_FS - memset(rc->proc_entries, 0 ,DEBUG_MAX_VIEWS * - sizeof(struct proc_dir_entry*)); -#endif /* CONFIG_PROC_FS */ + memset(rc->debugfs_entries, 0 ,DEBUG_MAX_VIEWS * + sizeof(struct dentry*)); atomic_set(&(rc->ref_count), 0); return rc; -fail_malloc_areas2: - kfree(rc->areas); fail_malloc_areas: - kfree(rc->active_entry); -fail_malloc_active_entry: + kfree(rc->active_pages); +fail_malloc_active_pages: + kfree(rc->active_entries); +fail_malloc_active_entries: kfree(rc); fail_malloc_rc: return NULL; } /* - * debug_info_free - * - free memory debug-info + * debug_areas_free + * - free all debug areas */ -static void debug_info_free(debug_info_t* db_info){ - int i; +static void +debug_areas_free(debug_info_t* db_info) +{ + int i,j; + + if(!db_info->areas) + return; for (i = 0; i < db_info->nr_areas; i++) { - free_pages((unsigned long) db_info->areas[i], - db_info->page_order); + for(j = 0; j < db_info->pages_per_area; j++) { + kfree(db_info->areas[i][j]); + } + kfree(db_info->areas[i]); } kfree(db_info->areas); - kfree(db_info->active_entry); + db_info->areas = NULL; +} + +/* + * debug_info_free + * - free memory debug-info + */ + +static void +debug_info_free(debug_info_t* db_info){ + debug_areas_free(db_info); + kfree(db_info->active_entries); + kfree(db_info->active_pages); kfree(db_info); } @@ -249,21 +332,22 @@ static void debug_info_free(debug_info_t* db_info){ * - create new debug-info */ -static debug_info_t* debug_info_create(char *name, int page_order, - int nr_areas, int buf_size) +static debug_info_t* +debug_info_create(char *name, int pages_per_area, int nr_areas, int buf_size) { debug_info_t* rc; - rc = debug_info_alloc(name, page_order, nr_areas, buf_size); + rc = debug_info_alloc(name, pages_per_area, nr_areas, buf_size, + DEBUG_DEFAULT_LEVEL, ALL_AREAS); if(!rc) goto out; - - /* create proc rood directory */ - rc->proc_root_entry = proc_mkdir(rc->name, debug_proc_root_entry); + /* create root directory */ + rc->debugfs_root_entry = debugfs_create_dir(rc->name, + debug_debugfs_root_entry); /* append new element to linked list */ - if (debug_area_first == NULL) { + if (!debug_area_first) { /* first element in list */ debug_area_first = rc; rc->prev = NULL; @@ -285,17 +369,21 @@ out: * - copy debug-info */ -static debug_info_t* debug_info_copy(debug_info_t* in) +static debug_info_t* +debug_info_copy(debug_info_t* in, int mode) { - int i; + int i,j; debug_info_t* rc; - rc = debug_info_alloc(in->name, in->page_order, - in->nr_areas, in->buf_size); - if(!rc) + + rc = debug_info_alloc(in->name, in->pages_per_area, in->nr_areas, + in->buf_size, in->level, mode); + if(!rc || (mode == NO_AREAS)) goto out; for(i = 0; i < in->nr_areas; i++){ - memcpy(rc->areas[i],in->areas[i], PAGE_SIZE << in->page_order); + for(j = 0; j < in->pages_per_area; j++) { + memcpy(rc->areas[i][j], in->areas[i][j],PAGE_SIZE); + } } out: return rc; @@ -306,7 +394,8 @@ out: * - increments reference count for debug-info */ -static void debug_info_get(debug_info_t * db_info) +static void +debug_info_get(debug_info_t * db_info) { if (db_info) atomic_inc(&db_info->ref_count); @@ -317,29 +406,20 @@ static void debug_info_get(debug_info_t * db_info) * - decreases reference count for debug-info and frees it if necessary */ -static void debug_info_put(debug_info_t *db_info) +static void +debug_info_put(debug_info_t *db_info) { int i; if (!db_info) return; if (atomic_dec_and_test(&db_info->ref_count)) { -#ifdef DEBUG - printk(KERN_INFO "debug: freeing debug area %p (%s)\n", - db_info, db_info->name); -#endif for (i = 0; i < DEBUG_MAX_VIEWS; i++) { - if (db_info->views[i] == NULL) + if (!db_info->views[i]) continue; -#ifdef CONFIG_PROC_FS - remove_proc_entry(db_info->proc_entries[i]->name, - db_info->proc_root_entry); -#endif + debugfs_remove(db_info->debugfs_entries[i]); } -#ifdef CONFIG_PROC_FS - remove_proc_entry(db_info->proc_root_entry->name, - debug_proc_root_entry); -#endif + debugfs_remove(db_info->debugfs_root_entry); if(db_info == debug_area_first) debug_area_first = db_info->next; if(db_info == debug_area_last) @@ -355,9 +435,9 @@ static void debug_info_put(debug_info_t *db_info) * - format one debug entry and return size of formated data */ -static int debug_format_entry(file_private_info_t *p_info) +static int +debug_format_entry(file_private_info_t *p_info) { - debug_info_t *id_org = p_info->debug_info_org; debug_info_t *id_snap = p_info->debug_info_snap; struct debug_view *view = p_info->view; debug_entry_t *act_entry; @@ -365,22 +445,23 @@ static int debug_format_entry(file_private_info_t *p_info) if(p_info->act_entry == DEBUG_PROLOG_ENTRY){ /* print prolog */ if (view->prolog_proc) - len += view->prolog_proc(id_org, view,p_info->temp_buf); + len += view->prolog_proc(id_snap,view,p_info->temp_buf); goto out; } - - act_entry = (debug_entry_t *) ((char*)id_snap->areas[p_info->act_area] + - p_info->act_entry); + if (!id_snap->areas) /* this is true, if we have a prolog only view */ + goto out; /* or if 'pages_per_area' is 0 */ + act_entry = (debug_entry_t *) ((char*)id_snap->areas[p_info->act_area] + [p_info->act_page] + p_info->act_entry); if (act_entry->id.stck == 0LL) goto out; /* empty entry */ if (view->header_proc) - len += view->header_proc(id_org, view, p_info->act_area, + len += view->header_proc(id_snap, view, p_info->act_area, act_entry, p_info->temp_buf + len); if (view->format_proc) - len += view->format_proc(id_org, view, p_info->temp_buf + len, + len += view->format_proc(id_snap, view, p_info->temp_buf + len, DEBUG_DATA(act_entry)); - out: +out: return len; } @@ -389,20 +470,30 @@ static int debug_format_entry(file_private_info_t *p_info) * - goto next entry in p_info */ -extern inline int debug_next_entry(file_private_info_t *p_info) +extern inline int +debug_next_entry(file_private_info_t *p_info) { - debug_info_t *id = p_info->debug_info_snap; + debug_info_t *id; + + id = p_info->debug_info_snap; if(p_info->act_entry == DEBUG_PROLOG_ENTRY){ p_info->act_entry = 0; + p_info->act_page = 0; goto out; } - if ((p_info->act_entry += id->entry_size) - > ((PAGE_SIZE << (id->page_order)) - - id->entry_size)){ - - /* next area */ + if(!id->areas) + return 1; + p_info->act_entry += id->entry_size; + /* switch to next page, if we reached the end of the page */ + if (p_info->act_entry > (PAGE_SIZE - id->entry_size)){ + /* next page */ p_info->act_entry = 0; - p_info->act_area++; + p_info->act_page += 1; + if((p_info->act_page % id->pages_per_area) == 0) { + /* next area */ + p_info->act_area++; + p_info->act_page=0; + } if(p_info->act_area >= id->nr_areas) return 1; } @@ -416,13 +507,14 @@ out: * - copies formated debug entries to the user buffer */ -static ssize_t debug_output(struct file *file, /* file descriptor */ - char __user *user_buf, /* user buffer */ - size_t len, /* length of buffer */ - loff_t *offset) /* offset in the file */ +static ssize_t +debug_output(struct file *file, /* file descriptor */ + char __user *user_buf, /* user buffer */ + size_t len, /* length of buffer */ + loff_t *offset) /* offset in the file */ { size_t count = 0; - size_t entry_offset, size = 0; + size_t entry_offset; file_private_info_t *p_info; p_info = ((file_private_info_t *) file->private_data); @@ -430,27 +522,33 @@ static ssize_t debug_output(struct file *file, /* file descriptor */ return -EPIPE; if(p_info->act_area >= p_info->debug_info_snap->nr_areas) return 0; - entry_offset = p_info->act_entry_offset; - while(count < len){ - size = debug_format_entry(p_info); - size = min((len - count), (size - entry_offset)); - - if(size){ - if (copy_to_user(user_buf + count, - p_info->temp_buf + entry_offset, size)) - return -EFAULT; + int formatted_line_size; + int formatted_line_residue; + int user_buf_residue; + size_t copy_size; + + formatted_line_size = debug_format_entry(p_info); + formatted_line_residue = formatted_line_size - entry_offset; + user_buf_residue = len-count; + copy_size = min(user_buf_residue, formatted_line_residue); + if(copy_size){ + if (copy_to_user(user_buf + count, p_info->temp_buf + + entry_offset, copy_size)) + return -EFAULT; + count += copy_size; + entry_offset += copy_size; } - count += size; - entry_offset = 0; - if(count != len) - if(debug_next_entry(p_info)) + if(copy_size == formatted_line_residue){ + entry_offset = 0; + if(debug_next_entry(p_info)) goto out; + } } out: p_info->offset = *offset + count; - p_info->act_entry_offset = size; + p_info->act_entry_offset = entry_offset; *offset = p_info->offset; return count; } @@ -461,9 +559,9 @@ out: * - calls input function of view */ -static ssize_t debug_input(struct file *file, - const char __user *user_buf, size_t length, - loff_t *offset) +static ssize_t +debug_input(struct file *file, const char __user *user_buf, size_t length, + loff_t *offset) { int rc = 0; file_private_info_t *p_info; @@ -487,26 +585,23 @@ static ssize_t debug_input(struct file *file, * handle */ -static int debug_open(struct inode *inode, struct file *file) +static int +debug_open(struct inode *inode, struct file *file) { int i = 0, rc = 0; file_private_info_t *p_info; debug_info_t *debug_info, *debug_info_snapshot; -#ifdef DEBUG - printk("debug_open\n"); -#endif down(&debug_lock); /* find debug log and view */ - debug_info = debug_area_first; while(debug_info != NULL){ for (i = 0; i < DEBUG_MAX_VIEWS; i++) { - if (debug_info->views[i] == NULL) + if (!debug_info->views[i]) continue; - else if (debug_info->proc_entries[i] == - PDE(file->f_dentry->d_inode)) { + else if (debug_info->debugfs_entries[i] == + file->f_dentry) { goto found; /* found view ! */ } } @@ -516,41 +611,42 @@ static int debug_open(struct inode *inode, struct file *file) rc = -EINVAL; goto out; - found: +found: - /* make snapshot of current debug areas to get it consistent */ + /* Make snapshot of current debug areas to get it consistent. */ + /* To copy all the areas is only needed, if we have a view which */ + /* formats the debug areas. */ - debug_info_snapshot = debug_info_copy(debug_info); + if(!debug_info->views[i]->format_proc && + !debug_info->views[i]->header_proc){ + debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS); + } else { + debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS); + } if(!debug_info_snapshot){ -#ifdef DEBUG - printk(KERN_ERR "debug_open: debug_info_copy failed (out of mem)\n"); -#endif rc = -ENOMEM; goto out; } - - if ((file->private_data = - kmalloc(sizeof(file_private_info_t), GFP_ATOMIC)) == 0) { -#ifdef DEBUG - printk(KERN_ERR "debug_open: kmalloc failed\n"); -#endif - debug_info_free(debug_info_snapshot); + p_info = (file_private_info_t *) kmalloc(sizeof(file_private_info_t), + GFP_KERNEL); + if(!p_info){ + if(debug_info_snapshot) + debug_info_free(debug_info_snapshot); rc = -ENOMEM; goto out; } - p_info = (file_private_info_t *) file->private_data; p_info->offset = 0; p_info->debug_info_snap = debug_info_snapshot; p_info->debug_info_org = debug_info; p_info->view = debug_info->views[i]; p_info->act_area = 0; + p_info->act_page = 0; p_info->act_entry = DEBUG_PROLOG_ENTRY; p_info->act_entry_offset = 0; - + file->private_data = p_info; debug_info_get(debug_info); - - out: +out: up(&debug_lock); return rc; } @@ -561,14 +657,13 @@ static int debug_open(struct inode *inode, struct file *file) * - deletes private_data area of the file handle */ -static int debug_close(struct inode *inode, struct file *file) +static int +debug_close(struct inode *inode, struct file *file) { file_private_info_t *p_info; -#ifdef DEBUG - printk("debug_close\n"); -#endif p_info = (file_private_info_t *) file->private_data; - debug_info_free(p_info->debug_info_snap); + if(p_info->debug_info_snap) + debug_info_free(p_info->debug_info_snap); debug_info_put(p_info->debug_info_org); kfree(file->private_data); return 0; /* success */ @@ -580,8 +675,8 @@ static int debug_close(struct inode *inode, struct file *file) * - returns handle for debug area */ -debug_info_t *debug_register - (char *name, int page_order, int nr_areas, int buf_size) +debug_info_t* +debug_register (char *name, int pages_per_area, int nr_areas, int buf_size) { debug_info_t *rc = NULL; @@ -591,18 +686,14 @@ debug_info_t *debug_register /* create new debug_info */ - rc = debug_info_create(name, page_order, nr_areas, buf_size); + rc = debug_info_create(name, pages_per_area, nr_areas, buf_size); if(!rc) goto out; debug_register_view(rc, &debug_level_view); debug_register_view(rc, &debug_flush_view); -#ifdef DEBUG - printk(KERN_INFO - "debug: reserved %d areas of %d pages for debugging %s\n", - nr_areas, 1 << page_order, rc->name); -#endif - out: - if (rc == NULL){ + debug_register_view(rc, &debug_pages_view); +out: + if (!rc){ printk(KERN_ERR "debug: debug_register failed for %s\n",name); } up(&debug_lock); @@ -614,27 +705,65 @@ debug_info_t *debug_register * - give back debug area */ -void debug_unregister(debug_info_t * id) +void +debug_unregister(debug_info_t * id) { if (!id) goto out; down(&debug_lock); -#ifdef DEBUG - printk(KERN_INFO "debug: unregistering %s\n", id->name); -#endif debug_info_put(id); up(&debug_lock); - out: +out: return; } +/* + * debug_set_size: + * - set area size (number of pages) and number of areas + */ +static int +debug_set_size(debug_info_t* id, int nr_areas, int pages_per_area) +{ + unsigned long flags; + debug_entry_t *** new_areas; + int rc=0; + + if(!id || (nr_areas <= 0) || (pages_per_area < 0)) + return -EINVAL; + if(pages_per_area > 0){ + new_areas = debug_areas_alloc(pages_per_area, nr_areas); + if(!new_areas) { + printk(KERN_WARNING "debug: could not allocate memory "\ + "for pagenumber: %i\n",pages_per_area); + rc = -ENOMEM; + goto out; + } + } else { + new_areas = NULL; + } + spin_lock_irqsave(&id->lock,flags); + debug_areas_free(id); + id->areas = new_areas; + id->nr_areas = nr_areas; + id->pages_per_area = pages_per_area; + id->active_area = 0; + memset(id->active_entries,0,sizeof(int)*id->nr_areas); + memset(id->active_pages, 0, sizeof(int)*id->nr_areas); + spin_unlock_irqrestore(&id->lock,flags); + printk(KERN_INFO "debug: %s: set new size (%i pages)\n"\ + ,id->name, pages_per_area); +out: + return rc; +} + /* * debug_set_level: * - set actual debug level */ -void debug_set_level(debug_info_t* id, int new_level) +void +debug_set_level(debug_info_t* id, int new_level) { unsigned long flags; if(!id) @@ -649,10 +778,6 @@ void debug_set_level(debug_info_t* id, int new_level) id->name, new_level, 0, DEBUG_MAX_LEVEL); } else { id->level = new_level; -#ifdef DEBUG - printk(KERN_INFO - "debug: %s: new level %i\n",id->name,id->level); -#endif } spin_unlock_irqrestore(&id->lock,flags); } @@ -663,11 +788,16 @@ void debug_set_level(debug_info_t* id, int new_level) * - set active entry to next in the ring buffer */ -extern inline void proceed_active_entry(debug_info_t * id) +extern inline void +proceed_active_entry(debug_info_t * id) { - if ((id->active_entry[id->active_area] += id->entry_size) - > ((PAGE_SIZE << (id->page_order)) - id->entry_size)) - id->active_entry[id->active_area] = 0; + if ((id->active_entries[id->active_area] += id->entry_size) + > (PAGE_SIZE - id->entry_size)){ + id->active_entries[id->active_area] = 0; + id->active_pages[id->active_area] = + (id->active_pages[id->active_area] + 1) % + id->pages_per_area; + } } /* @@ -675,7 +805,8 @@ extern inline void proceed_active_entry(debug_info_t * id) * - set active area to next in the ring buffer */ -extern inline void proceed_active_area(debug_info_t * id) +extern inline void +proceed_active_area(debug_info_t * id) { id->active_area++; id->active_area = id->active_area % id->nr_areas; @@ -685,10 +816,12 @@ extern inline void proceed_active_area(debug_info_t * id) * get_active_entry: */ -extern inline debug_entry_t *get_active_entry(debug_info_t * id) +extern inline debug_entry_t* +get_active_entry(debug_info_t * id) { - return (debug_entry_t *) ((char *) id->areas[id->active_area] + - id->active_entry[id->active_area]); + return (debug_entry_t *) (((char *) id->areas[id->active_area] + [id->active_pages[id->active_area]]) + + id->active_entries[id->active_area]); } /* @@ -696,8 +829,9 @@ extern inline debug_entry_t *get_active_entry(debug_info_t * id) * - set timestamp, caller address, cpu number etc. */ -extern inline void debug_finish_entry(debug_info_t * id, debug_entry_t* active, - int level, int exception) +extern inline void +debug_finish_entry(debug_info_t * id, debug_entry_t* active, int level, + int exception) { STCK(active->id.stck); active->id.fields.cpuid = smp_processor_id(); @@ -721,7 +855,8 @@ static int debug_active=1; * always allow read, allow write only if debug_stoppable is set or * if debug_active is already off */ -static int s390dbf_procactive(ctl_table *table, int write, struct file *filp, +static int +s390dbf_procactive(ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { if (!write || debug_stoppable || !debug_active) @@ -766,7 +901,8 @@ static struct ctl_table s390dbf_dir_table[] = { struct ctl_table_header *s390dbf_sysctl_header; -void debug_stop_all(void) +void +debug_stop_all(void) { if (debug_stoppable) debug_active = 0; @@ -778,13 +914,13 @@ void debug_stop_all(void) * - write debug entry with given size */ -debug_entry_t *debug_event_common(debug_info_t * id, int level, const void *buf, - int len) +debug_entry_t* +debug_event_common(debug_info_t * id, int level, const void *buf, int len) { unsigned long flags; debug_entry_t *active; - if (!debug_active) + if (!debug_active || !id->areas) return NULL; spin_lock_irqsave(&id->lock, flags); active = get_active_entry(id); @@ -801,13 +937,13 @@ debug_entry_t *debug_event_common(debug_info_t * id, int level, const void *buf, * - write debug entry with given size and switch to next debug area */ -debug_entry_t *debug_exception_common(debug_info_t * id, int level, - const void *buf, int len) +debug_entry_t +*debug_exception_common(debug_info_t * id, int level, const void *buf, int len) { unsigned long flags; debug_entry_t *active; - if (!debug_active) + if (!debug_active || !id->areas) return NULL; spin_lock_irqsave(&id->lock, flags); active = get_active_entry(id); @@ -823,7 +959,8 @@ debug_entry_t *debug_exception_common(debug_info_t * id, int level, * counts arguments in format string for sprintf view */ -extern inline int debug_count_numargs(char *string) +extern inline int +debug_count_numargs(char *string) { int numargs=0; @@ -838,8 +975,8 @@ extern inline int debug_count_numargs(char *string) * debug_sprintf_event: */ -debug_entry_t *debug_sprintf_event(debug_info_t* id, - int level,char *string,...) +debug_entry_t* +debug_sprintf_event(debug_info_t* id, int level,char *string,...) { va_list ap; int numargs,idx; @@ -849,7 +986,7 @@ debug_entry_t *debug_sprintf_event(debug_info_t* id, if((!id) || (level > id->level)) return NULL; - if (!debug_active) + if (!debug_active || !id->areas) return NULL; numargs=debug_count_numargs(string); @@ -871,8 +1008,8 @@ debug_entry_t *debug_sprintf_event(debug_info_t* id, * debug_sprintf_exception: */ -debug_entry_t *debug_sprintf_exception(debug_info_t* id, - int level,char *string,...) +debug_entry_t* +debug_sprintf_exception(debug_info_t* id, int level,char *string,...) { va_list ap; int numargs,idx; @@ -882,7 +1019,7 @@ debug_entry_t *debug_sprintf_exception(debug_info_t* id, if((!id) || (level > id->level)) return NULL; - if (!debug_active) + if (!debug_active || !id->areas) return NULL; numargs=debug_count_numargs(string); @@ -906,15 +1043,14 @@ debug_entry_t *debug_sprintf_exception(debug_info_t* id, * - is called exactly once to initialize the debug feature */ -static int __init debug_init(void) +static int +__init debug_init(void) { int rc = 0; s390dbf_sysctl_header = register_sysctl_table(s390dbf_dir_table, 1); down(&debug_lock); -#ifdef CONFIG_PROC_FS - debug_proc_root_entry = proc_mkdir(DEBUG_DIR_ROOT, NULL); -#endif /* CONFIG_PROC_FS */ + debug_debugfs_root_entry = debugfs_create_dir(DEBUG_DIR_ROOT,NULL); printk(KERN_INFO "debug: Initialization complete\n"); initialized = 1; up(&debug_lock); @@ -926,13 +1062,14 @@ static int __init debug_init(void) * debug_register_view: */ -int debug_register_view(debug_info_t * id, struct debug_view *view) +int +debug_register_view(debug_info_t * id, struct debug_view *view) { int rc = 0; int i; unsigned long flags; mode_t mode = S_IFREG; - struct proc_dir_entry *pde; + struct dentry *pde; if (!id) goto out; @@ -940,16 +1077,17 @@ int debug_register_view(debug_info_t * id, struct debug_view *view) mode |= S_IRUSR; if (view->input_proc) mode |= S_IWUSR; - pde = create_proc_entry(view->name, mode, id->proc_root_entry); + pde = debugfs_create_file(view->name, mode, id->debugfs_root_entry, + NULL, &debug_file_ops); if (!pde){ - printk(KERN_WARNING "debug: create_proc_entry() failed! Cannot register view %s/%s\n", id->name,view->name); + printk(KERN_WARNING "debug: debugfs_create_file() failed!"\ + " Cannot register view %s/%s\n", id->name,view->name); rc = -1; goto out; } - spin_lock_irqsave(&id->lock, flags); for (i = 0; i < DEBUG_MAX_VIEWS; i++) { - if (id->views[i] == NULL) + if (!id->views[i]) break; } if (i == DEBUG_MAX_VIEWS) { @@ -957,16 +1095,14 @@ int debug_register_view(debug_info_t * id, struct debug_view *view) id->name,view->name); printk(KERN_WARNING "debug: maximum number of views reached (%i)!\n", i); - remove_proc_entry(pde->name, id->proc_root_entry); + debugfs_remove(pde); rc = -1; - } - else { + } else { id->views[i] = view; - pde->proc_fops = &debug_file_ops; - id->proc_entries[i] = pde; + id->debugfs_entries[i] = pde; } spin_unlock_irqrestore(&id->lock, flags); - out: +out: return rc; } @@ -974,7 +1110,8 @@ int debug_register_view(debug_info_t * id, struct debug_view *view) * debug_unregister_view: */ -int debug_unregister_view(debug_info_t * id, struct debug_view *view) +int +debug_unregister_view(debug_info_t * id, struct debug_view *view) { int rc = 0; int i; @@ -990,15 +1127,46 @@ int debug_unregister_view(debug_info_t * id, struct debug_view *view) if (i == DEBUG_MAX_VIEWS) rc = -1; else { -#ifdef CONFIG_PROC_FS - remove_proc_entry(id->proc_entries[i]->name, - id->proc_root_entry); -#endif + debugfs_remove(id->debugfs_entries[i]); id->views[i] = NULL; rc = 0; } spin_unlock_irqrestore(&id->lock, flags); - out: +out: + return rc; +} + +static inline char * +debug_get_user_string(const char __user *user_buf, size_t user_len) +{ + char* buffer; + + buffer = kmalloc(user_len + 1, GFP_KERNEL); + if (!buffer) + return ERR_PTR(-ENOMEM); + if (copy_from_user(buffer, user_buf, user_len) != 0) { + kfree(buffer); + return ERR_PTR(-EFAULT); + } + /* got the string, now strip linefeed. */ + if (buffer[user_len - 1] == '\n') + buffer[user_len - 1] = 0; + else + buffer[user_len] = 0; + return buffer; +} + +static inline int +debug_get_uint(char *buf) +{ + int rc; + + for(; isspace(*buf); buf++); + rc = simple_strtoul(buf, &buf, 10); + if(*buf){ + printk("debug: no integer specified!\n"); + rc = -EINVAL; + } return rc; } @@ -1011,13 +1179,69 @@ int debug_unregister_view(debug_info_t * id, struct debug_view *view) * prints out actual debug level */ -static int debug_prolog_level_fn(debug_info_t * id, +static int +debug_prolog_pages_fn(debug_info_t * id, struct debug_view *view, char *out_buf) +{ + return sprintf(out_buf, "%i\n", id->pages_per_area); +} + +/* + * reads new size (number of pages per debug area) + */ + +static int +debug_input_pages_fn(debug_info_t * id, struct debug_view *view, + struct file *file, const char __user *user_buf, + size_t user_len, loff_t * offset) +{ + char *str; + int rc,new_pages; + + if (user_len > 0x10000) + user_len = 0x10000; + if (*offset != 0){ + rc = -EPIPE; + goto out; + } + str = debug_get_user_string(user_buf,user_len); + if(IS_ERR(str)){ + rc = PTR_ERR(str); + goto out; + } + new_pages = debug_get_uint(str); + if(new_pages < 0){ + rc = -EINVAL; + goto free_str; + } + rc = debug_set_size(id,id->nr_areas, new_pages); + if(rc != 0){ + rc = -EINVAL; + goto free_str; + } + rc = user_len; +free_str: + kfree(str); +out: + *offset += user_len; + return rc; /* number of input characters */ +} + +/* + * prints out actual debug level + */ + +static int +debug_prolog_level_fn(debug_info_t * id, struct debug_view *view, char *out_buf) { int rc = 0; - if(id->level == -1) rc = sprintf(out_buf,"-\n"); - else rc = sprintf(out_buf, "%i\n", id->level); + if(id->level == DEBUG_OFF_LEVEL) { + rc = sprintf(out_buf,"-\n"); + } + else { + rc = sprintf(out_buf, "%i\n", id->level); + } return rc; } @@ -1025,30 +1249,43 @@ static int debug_prolog_level_fn(debug_info_t * id, * reads new debug level */ -static int debug_input_level_fn(debug_info_t * id, struct debug_view *view, - struct file *file, const char __user *user_buf, - size_t in_buf_size, loff_t * offset) +static int +debug_input_level_fn(debug_info_t * id, struct debug_view *view, + struct file *file, const char __user *user_buf, + size_t user_len, loff_t * offset) { - char input_buf[1]; - int rc = in_buf_size; + char *str; + int rc,new_level; - if (*offset != 0) + if (user_len > 0x10000) + user_len = 0x10000; + if (*offset != 0){ + rc = -EPIPE; goto out; - if (copy_from_user(input_buf, user_buf, 1)){ - rc = -EFAULT; + } + str = debug_get_user_string(user_buf,user_len); + if(IS_ERR(str)){ + rc = PTR_ERR(str); goto out; } - if (isdigit(input_buf[0])) { - int new_level = ((int) input_buf[0] - (int) '0'); - debug_set_level(id, new_level); - } else if(input_buf[0] == '-') { + if(str[0] == '-'){ debug_set_level(id, DEBUG_OFF_LEVEL); + rc = user_len; + goto free_str; } else { - printk(KERN_INFO "debug: level `%c` is not valid\n", - input_buf[0]); + new_level = debug_get_uint(str); } - out: - *offset += in_buf_size; + if(new_level < 0) { + printk(KERN_INFO "debug: level `%s` is not valid\n", str); + rc = -EINVAL; + } else { + debug_set_level(id, new_level); + rc = user_len; + } +free_str: + kfree(str); +out: + *offset += user_len; return rc; /* number of input characters */ } @@ -1057,29 +1294,36 @@ static int debug_input_level_fn(debug_info_t * id, struct debug_view *view, * flushes debug areas */ -void debug_flush(debug_info_t* id, int area) +void +debug_flush(debug_info_t* id, int area) { unsigned long flags; - int i; + int i,j; - if(!id) + if(!id || !id->areas) return; spin_lock_irqsave(&id->lock,flags); if(area == DEBUG_FLUSH_ALL){ id->active_area = 0; - memset(id->active_entry, 0, id->nr_areas * sizeof(int)); - for (i = 0; i < id->nr_areas; i++) - memset(id->areas[i], 0, PAGE_SIZE << id->page_order); + memset(id->active_entries, 0, id->nr_areas * sizeof(int)); + for (i = 0; i < id->nr_areas; i++) { + id->active_pages[i] = 0; + for(j = 0; j < id->pages_per_area; j++) { + memset(id->areas[i][j], 0, PAGE_SIZE); + } + } printk(KERN_INFO "debug: %s: all areas flushed\n",id->name); } else if(area >= 0 && area < id->nr_areas) { - id->active_entry[area] = 0; - memset(id->areas[area], 0, PAGE_SIZE << id->page_order); - printk(KERN_INFO - "debug: %s: area %i has been flushed\n", + id->active_entries[area] = 0; + id->active_pages[area] = 0; + for(i = 0; i < id->pages_per_area; i++) { + memset(id->areas[area][i],0,PAGE_SIZE); + } + printk(KERN_INFO "debug: %s: area %i has been flushed\n", id->name, area); } else { printk(KERN_INFO - "debug: %s: area %i cannot be flushed (range: %i - %i)\n", + "debug: %s: area %i cannot be flushed (range: %i - %i)\n", id->name, area, 0, id->nr_areas-1); } spin_unlock_irqrestore(&id->lock,flags); @@ -1089,15 +1333,20 @@ void debug_flush(debug_info_t* id, int area) * view function: flushes debug areas */ -static int debug_input_flush_fn(debug_info_t * id, struct debug_view *view, - struct file *file, const char __user *user_buf, - size_t in_buf_size, loff_t * offset) +static int +debug_input_flush_fn(debug_info_t * id, struct debug_view *view, + struct file *file, const char __user *user_buf, + size_t user_len, loff_t * offset) { char input_buf[1]; - int rc = in_buf_size; - - if (*offset != 0) + int rc = user_len; + + if (user_len > 0x10000) + user_len = 0x10000; + if (*offset != 0){ + rc = -EPIPE; goto out; + } if (copy_from_user(input_buf, user_buf, 1)){ rc = -EFAULT; goto out; @@ -1114,8 +1363,8 @@ static int debug_input_flush_fn(debug_info_t * id, struct debug_view *view, printk(KERN_INFO "debug: area `%c` is not valid\n", input_buf[0]); - out: - *offset += in_buf_size; +out: + *offset += user_len; return rc; /* number of input characters */ } @@ -1123,8 +1372,9 @@ static int debug_input_flush_fn(debug_info_t * id, struct debug_view *view, * prints debug header in raw format */ -int debug_raw_header_fn(debug_info_t * id, struct debug_view *view, - int area, debug_entry_t * entry, char *out_buf) +static int +debug_raw_header_fn(debug_info_t * id, struct debug_view *view, + int area, debug_entry_t * entry, char *out_buf) { int rc; @@ -1137,7 +1387,8 @@ int debug_raw_header_fn(debug_info_t * id, struct debug_view *view, * prints debug data in raw format */ -static int debug_raw_format_fn(debug_info_t * id, struct debug_view *view, +static int +debug_raw_format_fn(debug_info_t * id, struct debug_view *view, char *out_buf, const char *in_buf) { int rc; @@ -1151,8 +1402,9 @@ static int debug_raw_format_fn(debug_info_t * id, struct debug_view *view, * prints debug data in hex/ascii format */ -static int debug_hex_ascii_format_fn(debug_info_t * id, struct debug_view *view, - char *out_buf, const char *in_buf) +static int +debug_hex_ascii_format_fn(debug_info_t * id, struct debug_view *view, + char *out_buf, const char *in_buf) { int i, rc = 0; @@ -1176,7 +1428,8 @@ static int debug_hex_ascii_format_fn(debug_info_t * id, struct debug_view *view, * prints header for debug entry */ -int debug_dflt_header_fn(debug_info_t * id, struct debug_view *view, +int +debug_dflt_header_fn(debug_info_t * id, struct debug_view *view, int area, debug_entry_t * entry, char *out_buf) { struct timeval time_val; @@ -1210,8 +1463,9 @@ int debug_dflt_header_fn(debug_info_t * id, struct debug_view *view, #define DEBUG_SPRINTF_MAX_ARGS 10 -int debug_sprintf_format_fn(debug_info_t * id, struct debug_view *view, - char *out_buf, debug_sprintf_entry_t *curr_event) +static int +debug_sprintf_format_fn(debug_info_t * id, struct debug_view *view, + char *out_buf, debug_sprintf_entry_t *curr_event) { int num_longs, num_used_args = 0,i, rc = 0; int index[DEBUG_SPRINTF_MAX_ARGS]; @@ -1251,14 +1505,10 @@ out: /* * clean up module */ -void __exit debug_exit(void) +void +__exit debug_exit(void) { -#ifdef DEBUG - printk("debug_cleanup_module: \n"); -#endif -#ifdef CONFIG_PROC_FS - remove_proc_entry(debug_proc_root_entry->name, NULL); -#endif /* CONFIG_PROC_FS */ + debugfs_remove(debug_debugfs_root_entry); unregister_sysctl_table(s390dbf_sysctl_header); return; } @@ -1266,7 +1516,7 @@ void __exit debug_exit(void) /* * module definitions */ -core_initcall(debug_init); +postcore_initcall(debug_init); module_exit(debug_exit); MODULE_LICENSE("GPL"); diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 3e39508bd92..6527ff6f470 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -176,7 +176,7 @@ dasd_state_known_to_basic(struct dasd_device * device) return rc; /* register 'device' debug area, used for all DBF_DEV_XXX calls */ - device->debug_area = debug_register(device->cdev->dev.bus_id, 0, 2, + device->debug_area = debug_register(device->cdev->dev.bus_id, 1, 2, 8 * sizeof (long)); debug_register_view(device->debug_area, &debug_sprintf_view); debug_set_level(device->debug_area, DBF_EMERG); @@ -1981,7 +1981,7 @@ dasd_init(void) init_waitqueue_head(&dasd_init_waitq); /* register 'common' DASD debug area, used for all DBF_XXX calls */ - dasd_debug_area = debug_register("dasd", 0, 2, 8 * sizeof (long)); + dasd_debug_area = debug_register("dasd", 1, 2, 8 * sizeof (long)); if (dasd_debug_area == NULL) { rc = -ENOMEM; goto failed; diff --git a/drivers/s390/block/dasd_proc.c b/drivers/s390/block/dasd_proc.c index d7f19745911..43c34f8c5e6 100644 --- a/drivers/s390/block/dasd_proc.c +++ b/drivers/s390/block/dasd_proc.c @@ -9,13 +9,14 @@ * * /proc interface for the dasd driver. * - * $Revision: 1.31 $ + * $Revision: 1.32 $ */ #include #include #include #include +#include #include #include diff --git a/drivers/s390/char/tape_34xx.c b/drivers/s390/char/tape_34xx.c index 480ec87976f..20be88e91fa 100644 --- a/drivers/s390/char/tape_34xx.c +++ b/drivers/s390/char/tape_34xx.c @@ -1351,13 +1351,13 @@ tape_34xx_init (void) { int rc; - TAPE_DBF_AREA = debug_register ( "tape_34xx", 1, 2, 4*sizeof(long)); + TAPE_DBF_AREA = debug_register ( "tape_34xx", 2, 2, 4*sizeof(long)); debug_register_view(TAPE_DBF_AREA, &debug_sprintf_view); #ifdef DBF_LIKE_HELL debug_set_level(TAPE_DBF_AREA, 6); #endif - DBF_EVENT(3, "34xx init: $Revision: 1.21 $\n"); + DBF_EVENT(3, "34xx init: $Revision: 1.23 $\n"); /* Register driver for 3480/3490 tapes. */ rc = ccw_driver_register(&tape_34xx_driver); if (rc) @@ -1378,7 +1378,7 @@ tape_34xx_exit(void) MODULE_DEVICE_TABLE(ccw, tape_34xx_ids); MODULE_AUTHOR("(C) 2001-2002 IBM Deutschland Entwicklung GmbH"); MODULE_DESCRIPTION("Linux on zSeries channel attached 3480 tape " - "device driver ($Revision: 1.21 $)"); + "device driver ($Revision: 1.23 $)"); MODULE_LICENSE("GPL"); module_init(tape_34xx_init); diff --git a/drivers/s390/char/tape_core.c b/drivers/s390/char/tape_core.c index b4df4a515b1..0597aa0e27e 100644 --- a/drivers/s390/char/tape_core.c +++ b/drivers/s390/char/tape_core.c @@ -1186,7 +1186,7 @@ tape_mtop(struct tape_device *device, int mt_op, int mt_count) static int tape_init (void) { - TAPE_DBF_AREA = debug_register ( "tape", 1, 2, 4*sizeof(long)); + TAPE_DBF_AREA = debug_register ( "tape", 2, 2, 4*sizeof(long)); debug_register_view(TAPE_DBF_AREA, &debug_sprintf_view); #ifdef DBF_LIKE_HELL debug_set_level(TAPE_DBF_AREA, 6); diff --git a/drivers/s390/char/tape_proc.c b/drivers/s390/char/tape_proc.c index 801d17cca34..5fec0a10cc3 100644 --- a/drivers/s390/char/tape_proc.c +++ b/drivers/s390/char/tape_proc.c @@ -15,6 +15,7 @@ #include #include #include +#include #define TAPE_DBF_AREA tape_core_dbf diff --git a/drivers/s390/char/vmcp.c b/drivers/s390/char/vmcp.c index dfbbf235ca2..7f11a608a63 100644 --- a/drivers/s390/char/vmcp.c +++ b/drivers/s390/char/vmcp.c @@ -203,7 +203,7 @@ static int __init vmcp_init(void) else printk(KERN_WARNING "z/VM CP interface not loaded. Could not register misc device.\n"); - vmcp_debug = debug_register("vmcp", 0, 1, 240); + vmcp_debug = debug_register("vmcp", 1, 1, 240); debug_register_view(vmcp_debug, &debug_hex_ascii_view); return ret; } diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 1d9b3f18d8d..ea813bdce1d 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -1,7 +1,7 @@ /* * drivers/s390/cio/cio.c * S/390 common I/O routines -- low level i/o calls - * $Revision: 1.133 $ + * $Revision: 1.134 $ * * Copyright (C) 1999-2002 IBM Deutschland Entwicklung GmbH, * IBM Corporation @@ -63,17 +63,17 @@ __setup ("cio_msg=", cio_setup); static int __init cio_debug_init (void) { - cio_debug_msg_id = debug_register ("cio_msg", 4, 4, 16*sizeof (long)); + cio_debug_msg_id = debug_register ("cio_msg", 16, 4, 16*sizeof (long)); if (!cio_debug_msg_id) goto out_unregister; debug_register_view (cio_debug_msg_id, &debug_sprintf_view); debug_set_level (cio_debug_msg_id, 2); - cio_debug_trace_id = debug_register ("cio_trace", 4, 4, 8); + cio_debug_trace_id = debug_register ("cio_trace", 16, 4, 8); if (!cio_debug_trace_id) goto out_unregister; debug_register_view (cio_debug_trace_id, &debug_hex_ascii_view); debug_set_level (cio_debug_trace_id, 2); - cio_debug_crw_id = debug_register ("cio_crw", 2, 4, 16*sizeof (long)); + cio_debug_crw_id = debug_register ("cio_crw", 4, 4, 16*sizeof (long)); if (!cio_debug_crw_id) goto out_unregister; debug_register_view (cio_debug_crw_id, &debug_sprintf_view); diff --git a/drivers/s390/cio/qdio.c b/drivers/s390/cio/qdio.c index bbe9f45d143..82194c4eadf 100644 --- a/drivers/s390/cio/qdio.c +++ b/drivers/s390/cio/qdio.c @@ -56,7 +56,7 @@ #include "ioasm.h" #include "chsc.h" -#define VERSION_QDIO_C "$Revision: 1.98 $" +#define VERSION_QDIO_C "$Revision: 1.101 $" /****************** MODULE PARAMETER VARIABLES ********************/ MODULE_AUTHOR("Utz Bacher "); @@ -3342,7 +3342,7 @@ static int qdio_register_dbf_views(void) { qdio_dbf_setup=debug_register(QDIO_DBF_SETUP_NAME, - QDIO_DBF_SETUP_INDEX, + QDIO_DBF_SETUP_PAGES, QDIO_DBF_SETUP_NR_AREAS, QDIO_DBF_SETUP_LEN); if (!qdio_dbf_setup) @@ -3351,7 +3351,7 @@ qdio_register_dbf_views(void) debug_set_level(qdio_dbf_setup,QDIO_DBF_SETUP_LEVEL); qdio_dbf_sbal=debug_register(QDIO_DBF_SBAL_NAME, - QDIO_DBF_SBAL_INDEX, + QDIO_DBF_SBAL_PAGES, QDIO_DBF_SBAL_NR_AREAS, QDIO_DBF_SBAL_LEN); if (!qdio_dbf_sbal) @@ -3361,7 +3361,7 @@ qdio_register_dbf_views(void) debug_set_level(qdio_dbf_sbal,QDIO_DBF_SBAL_LEVEL); qdio_dbf_sense=debug_register(QDIO_DBF_SENSE_NAME, - QDIO_DBF_SENSE_INDEX, + QDIO_DBF_SENSE_PAGES, QDIO_DBF_SENSE_NR_AREAS, QDIO_DBF_SENSE_LEN); if (!qdio_dbf_sense) @@ -3371,7 +3371,7 @@ qdio_register_dbf_views(void) debug_set_level(qdio_dbf_sense,QDIO_DBF_SENSE_LEVEL); qdio_dbf_trace=debug_register(QDIO_DBF_TRACE_NAME, - QDIO_DBF_TRACE_INDEX, + QDIO_DBF_TRACE_PAGES, QDIO_DBF_TRACE_NR_AREAS, QDIO_DBF_TRACE_LEN); if (!qdio_dbf_trace) @@ -3382,7 +3382,7 @@ qdio_register_dbf_views(void) #ifdef CONFIG_QDIO_DEBUG qdio_dbf_slsb_out=debug_register(QDIO_DBF_SLSB_OUT_NAME, - QDIO_DBF_SLSB_OUT_INDEX, + QDIO_DBF_SLSB_OUT_PAGES, QDIO_DBF_SLSB_OUT_NR_AREAS, QDIO_DBF_SLSB_OUT_LEN); if (!qdio_dbf_slsb_out) @@ -3391,7 +3391,7 @@ qdio_register_dbf_views(void) debug_set_level(qdio_dbf_slsb_out,QDIO_DBF_SLSB_OUT_LEVEL); qdio_dbf_slsb_in=debug_register(QDIO_DBF_SLSB_IN_NAME, - QDIO_DBF_SLSB_IN_INDEX, + QDIO_DBF_SLSB_IN_PAGES, QDIO_DBF_SLSB_IN_NR_AREAS, QDIO_DBF_SLSB_IN_LEN); if (!qdio_dbf_slsb_in) diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h index b6daadac4e8..6b8aa6a852b 100644 --- a/drivers/s390/cio/qdio.h +++ b/drivers/s390/cio/qdio.h @@ -3,7 +3,7 @@ #include -#define VERSION_CIO_QDIO_H "$Revision: 1.32 $" +#define VERSION_CIO_QDIO_H "$Revision: 1.33 $" #ifdef CONFIG_QDIO_DEBUG #define QDIO_VERBOSE_LEVEL 9 @@ -132,7 +132,7 @@ enum qdio_irq_states { #define QDIO_DBF_SETUP_NAME "qdio_setup" #define QDIO_DBF_SETUP_LEN 8 -#define QDIO_DBF_SETUP_INDEX 2 +#define QDIO_DBF_SETUP_PAGES 4 #define QDIO_DBF_SETUP_NR_AREAS 1 #ifdef CONFIG_QDIO_DEBUG #define QDIO_DBF_SETUP_LEVEL 6 @@ -142,7 +142,7 @@ enum qdio_irq_states { #define QDIO_DBF_SBAL_NAME "qdio_labs" /* sbal */ #define QDIO_DBF_SBAL_LEN 256 -#define QDIO_DBF_SBAL_INDEX 2 +#define QDIO_DBF_SBAL_PAGES 4 #define QDIO_DBF_SBAL_NR_AREAS 2 #ifdef CONFIG_QDIO_DEBUG #define QDIO_DBF_SBAL_LEVEL 6 @@ -154,16 +154,16 @@ enum qdio_irq_states { #define QDIO_DBF_TRACE_LEN 8 #define QDIO_DBF_TRACE_NR_AREAS 2 #ifdef CONFIG_QDIO_DEBUG -#define QDIO_DBF_TRACE_INDEX 4 +#define QDIO_DBF_TRACE_PAGES 16 #define QDIO_DBF_TRACE_LEVEL 4 /* -------- could be even more verbose here */ #else /* CONFIG_QDIO_DEBUG */ -#define QDIO_DBF_TRACE_INDEX 2 +#define QDIO_DBF_TRACE_PAGES 4 #define QDIO_DBF_TRACE_LEVEL 2 #endif /* CONFIG_QDIO_DEBUG */ #define QDIO_DBF_SENSE_NAME "qdio_sense" #define QDIO_DBF_SENSE_LEN 64 -#define QDIO_DBF_SENSE_INDEX 1 +#define QDIO_DBF_SENSE_PAGES 2 #define QDIO_DBF_SENSE_NR_AREAS 1 #ifdef CONFIG_QDIO_DEBUG #define QDIO_DBF_SENSE_LEVEL 6 @@ -176,13 +176,13 @@ enum qdio_irq_states { #define QDIO_DBF_SLSB_OUT_NAME "qdio_slsb_out" #define QDIO_DBF_SLSB_OUT_LEN QDIO_MAX_BUFFERS_PER_Q -#define QDIO_DBF_SLSB_OUT_INDEX 8 +#define QDIO_DBF_SLSB_OUT_PAGES 256 #define QDIO_DBF_SLSB_OUT_NR_AREAS 1 #define QDIO_DBF_SLSB_OUT_LEVEL 6 #define QDIO_DBF_SLSB_IN_NAME "qdio_slsb_in" #define QDIO_DBF_SLSB_IN_LEN QDIO_MAX_BUFFERS_PER_Q -#define QDIO_DBF_SLSB_IN_INDEX 8 +#define QDIO_DBF_SLSB_IN_PAGES 256 #define QDIO_DBF_SLSB_IN_NR_AREAS 1 #define QDIO_DBF_SLSB_IN_LEVEL 6 #endif /* CONFIG_QDIO_DEBUG */ diff --git a/drivers/s390/net/claw.c b/drivers/s390/net/claw.c index a99927d54eb..60440dbe3a2 100644 --- a/drivers/s390/net/claw.c +++ b/drivers/s390/net/claw.c @@ -146,8 +146,8 @@ claw_unregister_debug_facility(void) static int claw_register_debug_facility(void) { - claw_dbf_setup = debug_register("claw_setup", 1, 1, 8); - claw_dbf_trace = debug_register("claw_trace", 1, 2, 8); + claw_dbf_setup = debug_register("claw_setup", 2, 1, 8); + claw_dbf_trace = debug_register("claw_trace", 2, 2, 8); if (claw_dbf_setup == NULL || claw_dbf_trace == NULL) { printk(KERN_WARNING "Not enough memory for debug facility.\n"); claw_unregister_debug_facility(); diff --git a/drivers/s390/net/ctcdbug.c b/drivers/s390/net/ctcdbug.c index 2c86bfa11b2..0e2a8bb9303 100644 --- a/drivers/s390/net/ctcdbug.c +++ b/drivers/s390/net/ctcdbug.c @@ -1,6 +1,6 @@ /* * - * linux/drivers/s390/net/ctcdbug.c ($Revision: 1.4 $) + * linux/drivers/s390/net/ctcdbug.c ($Revision: 1.6 $) * * CTC / ESCON network driver - s390 dbf exploit. * @@ -9,7 +9,7 @@ * Author(s): Original Code written by * Peter Tiedemann (ptiedem@de.ibm.com) * - * $Revision: 1.4 $ $Date: 2004/08/04 10:11:59 $ + * $Revision: 1.6 $ $Date: 2005/05/11 08:10:17 $ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -51,15 +51,15 @@ int ctc_register_dbf_views(void) { ctc_dbf_setup = debug_register(CTC_DBF_SETUP_NAME, - CTC_DBF_SETUP_INDEX, + CTC_DBF_SETUP_PAGES, CTC_DBF_SETUP_NR_AREAS, CTC_DBF_SETUP_LEN); ctc_dbf_data = debug_register(CTC_DBF_DATA_NAME, - CTC_DBF_DATA_INDEX, + CTC_DBF_DATA_PAGES, CTC_DBF_DATA_NR_AREAS, CTC_DBF_DATA_LEN); ctc_dbf_trace = debug_register(CTC_DBF_TRACE_NAME, - CTC_DBF_TRACE_INDEX, + CTC_DBF_TRACE_PAGES, CTC_DBF_TRACE_NR_AREAS, CTC_DBF_TRACE_LEN); diff --git a/drivers/s390/net/ctcdbug.h b/drivers/s390/net/ctcdbug.h index 7fe2ebd1792..7d6afa1627c 100644 --- a/drivers/s390/net/ctcdbug.h +++ b/drivers/s390/net/ctcdbug.h @@ -1,6 +1,6 @@ /* * - * linux/drivers/s390/net/ctcdbug.h ($Revision: 1.5 $) + * linux/drivers/s390/net/ctcdbug.h ($Revision: 1.6 $) * * CTC / ESCON network driver - s390 dbf exploit. * @@ -9,7 +9,7 @@ * Author(s): Original Code written by * Peter Tiedemann (ptiedem@de.ibm.com) * - * $Revision: 1.5 $ $Date: 2005/02/27 19:46:44 $ + * $Revision: 1.6 $ $Date: 2005/05/11 08:10:17 $ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -35,19 +35,19 @@ */ #define CTC_DBF_SETUP_NAME "ctc_setup" #define CTC_DBF_SETUP_LEN 16 -#define CTC_DBF_SETUP_INDEX 3 +#define CTC_DBF_SETUP_PAGES 8 #define CTC_DBF_SETUP_NR_AREAS 1 #define CTC_DBF_SETUP_LEVEL 3 #define CTC_DBF_DATA_NAME "ctc_data" #define CTC_DBF_DATA_LEN 128 -#define CTC_DBF_DATA_INDEX 3 +#define CTC_DBF_DATA_PAGES 8 #define CTC_DBF_DATA_NR_AREAS 1 #define CTC_DBF_DATA_LEVEL 3 #define CTC_DBF_TRACE_NAME "ctc_trace" #define CTC_DBF_TRACE_LEN 16 -#define CTC_DBF_TRACE_INDEX 2 +#define CTC_DBF_TRACE_PAGES 4 #define CTC_DBF_TRACE_NR_AREAS 2 #define CTC_DBF_TRACE_LEVEL 3 diff --git a/drivers/s390/net/iucv.h b/drivers/s390/net/iucv.h index 198330217ef..0c4644d3d2f 100644 --- a/drivers/s390/net/iucv.h +++ b/drivers/s390/net/iucv.h @@ -37,19 +37,19 @@ */ #define IUCV_DBF_SETUP_NAME "iucv_setup" #define IUCV_DBF_SETUP_LEN 32 -#define IUCV_DBF_SETUP_INDEX 1 +#define IUCV_DBF_SETUP_PAGES 2 #define IUCV_DBF_SETUP_NR_AREAS 1 #define IUCV_DBF_SETUP_LEVEL 3 #define IUCV_DBF_DATA_NAME "iucv_data" #define IUCV_DBF_DATA_LEN 128 -#define IUCV_DBF_DATA_INDEX 1 +#define IUCV_DBF_DATA_PAGES 2 #define IUCV_DBF_DATA_NR_AREAS 1 #define IUCV_DBF_DATA_LEVEL 2 #define IUCV_DBF_TRACE_NAME "iucv_trace" #define IUCV_DBF_TRACE_LEN 16 -#define IUCV_DBF_TRACE_INDEX 2 +#define IUCV_DBF_TRACE_PAGES 4 #define IUCV_DBF_TRACE_NR_AREAS 1 #define IUCV_DBF_TRACE_LEVEL 3 diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c index ab086242d30..46f34ba93ac 100644 --- a/drivers/s390/net/lcs.c +++ b/drivers/s390/net/lcs.c @@ -11,7 +11,7 @@ * Frank Pavlic (pavlic@de.ibm.com) and * Martin Schwidefsky * - * $Revision: 1.98 $ $Date: 2005/04/18 13:41:29 $ + * $Revision: 1.99 $ $Date: 2005/05/11 08:10:17 $ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -59,7 +59,7 @@ /** * initialization string for output */ -#define VERSION_LCS_C "$Revision: 1.98 $" +#define VERSION_LCS_C "$Revision: 1.99 $" static char version[] __initdata = "LCS driver ("VERSION_LCS_C "/" VERSION_LCS_H ")"; static char debug_buffer[255]; @@ -93,8 +93,8 @@ lcs_unregister_debug_facility(void) static int lcs_register_debug_facility(void) { - lcs_dbf_setup = debug_register("lcs_setup", 1, 1, 8); - lcs_dbf_trace = debug_register("lcs_trace", 1, 2, 8); + lcs_dbf_setup = debug_register("lcs_setup", 2, 1, 8); + lcs_dbf_trace = debug_register("lcs_trace", 2, 2, 8); if (lcs_dbf_setup == NULL || lcs_dbf_trace == NULL) { PRINT_ERR("Not enough memory for debug facility.\n"); lcs_unregister_debug_facility(); diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c index 3fd4fb754b2..69425a7a6e9 100644 --- a/drivers/s390/net/netiucv.c +++ b/drivers/s390/net/netiucv.c @@ -1,5 +1,5 @@ /* - * $Id: netiucv.c,v 1.63 2004/07/27 13:36:05 mschwide Exp $ + * $Id: netiucv.c,v 1.66 2005/05/11 08:10:17 holzheu Exp $ * * IUCV network driver * @@ -30,7 +30,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * - * RELEASE-TAG: IUCV network driver $Revision: 1.63 $ + * RELEASE-TAG: IUCV network driver $Revision: 1.66 $ * */ @@ -391,15 +391,15 @@ static int iucv_register_dbf_views(void) { iucv_dbf_setup = debug_register(IUCV_DBF_SETUP_NAME, - IUCV_DBF_SETUP_INDEX, + IUCV_DBF_SETUP_PAGES, IUCV_DBF_SETUP_NR_AREAS, IUCV_DBF_SETUP_LEN); iucv_dbf_data = debug_register(IUCV_DBF_DATA_NAME, - IUCV_DBF_DATA_INDEX, + IUCV_DBF_DATA_PAGES, IUCV_DBF_DATA_NR_AREAS, IUCV_DBF_DATA_LEN); iucv_dbf_trace = debug_register(IUCV_DBF_TRACE_NAME, - IUCV_DBF_TRACE_INDEX, + IUCV_DBF_TRACE_PAGES, IUCV_DBF_TRACE_NR_AREAS, IUCV_DBF_TRACE_LEN); @@ -2076,7 +2076,7 @@ DRIVER_ATTR(remove, 0200, NULL, remove_write); static void netiucv_banner(void) { - char vbuf[] = "$Revision: 1.63 $"; + char vbuf[] = "$Revision: 1.66 $"; char *version = vbuf; if ((version = strchr(version, ':'))) { diff --git a/drivers/s390/net/qeth.h b/drivers/s390/net/qeth.h index a755b57db46..008e0a5d2eb 100644 --- a/drivers/s390/net/qeth.h +++ b/drivers/s390/net/qeth.h @@ -42,44 +42,44 @@ */ #define QETH_DBF_SETUP_NAME "qeth_setup" #define QETH_DBF_SETUP_LEN 8 -#define QETH_DBF_SETUP_INDEX 3 +#define QETH_DBF_SETUP_PAGES 8 #define QETH_DBF_SETUP_NR_AREAS 1 #define QETH_DBF_SETUP_LEVEL 5 #define QETH_DBF_MISC_NAME "qeth_misc" #define QETH_DBF_MISC_LEN 128 -#define QETH_DBF_MISC_INDEX 1 +#define QETH_DBF_MISC_PAGES 2 #define QETH_DBF_MISC_NR_AREAS 1 #define QETH_DBF_MISC_LEVEL 2 #define QETH_DBF_DATA_NAME "qeth_data" #define QETH_DBF_DATA_LEN 96 -#define QETH_DBF_DATA_INDEX 3 +#define QETH_DBF_DATA_PAGES 8 #define QETH_DBF_DATA_NR_AREAS 1 #define QETH_DBF_DATA_LEVEL 2 #define QETH_DBF_CONTROL_NAME "qeth_control" #define QETH_DBF_CONTROL_LEN 256 -#define QETH_DBF_CONTROL_INDEX 3 +#define QETH_DBF_CONTROL_PAGES 8 #define QETH_DBF_CONTROL_NR_AREAS 2 #define QETH_DBF_CONTROL_LEVEL 5 #define QETH_DBF_TRACE_NAME "qeth_trace" #define QETH_DBF_TRACE_LEN 8 -#define QETH_DBF_TRACE_INDEX 2 +#define QETH_DBF_TRACE_PAGES 4 #define QETH_DBF_TRACE_NR_AREAS 2 #define QETH_DBF_TRACE_LEVEL 3 extern debug_info_t *qeth_dbf_trace; #define QETH_DBF_SENSE_NAME "qeth_sense" #define QETH_DBF_SENSE_LEN 64 -#define QETH_DBF_SENSE_INDEX 1 +#define QETH_DBF_SENSE_PAGES 2 #define QETH_DBF_SENSE_NR_AREAS 1 #define QETH_DBF_SENSE_LEVEL 2 #define QETH_DBF_QERR_NAME "qeth_qerr" #define QETH_DBF_QERR_LEN 8 -#define QETH_DBF_QERR_INDEX 1 +#define QETH_DBF_QERR_PAGES 2 #define QETH_DBF_QERR_NR_AREAS 2 #define QETH_DBF_QERR_LEVEL 2 diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c index 208127a5033..3cb88c77003 100644 --- a/drivers/s390/net/qeth_main.c +++ b/drivers/s390/net/qeth_main.c @@ -7639,31 +7639,31 @@ static int qeth_register_dbf_views(void) { qeth_dbf_setup = debug_register(QETH_DBF_SETUP_NAME, - QETH_DBF_SETUP_INDEX, + QETH_DBF_SETUP_PAGES, QETH_DBF_SETUP_NR_AREAS, QETH_DBF_SETUP_LEN); qeth_dbf_misc = debug_register(QETH_DBF_MISC_NAME, - QETH_DBF_MISC_INDEX, + QETH_DBF_MISC_PAGES, QETH_DBF_MISC_NR_AREAS, QETH_DBF_MISC_LEN); qeth_dbf_data = debug_register(QETH_DBF_DATA_NAME, - QETH_DBF_DATA_INDEX, + QETH_DBF_DATA_PAGES, QETH_DBF_DATA_NR_AREAS, QETH_DBF_DATA_LEN); qeth_dbf_control = debug_register(QETH_DBF_CONTROL_NAME, - QETH_DBF_CONTROL_INDEX, + QETH_DBF_CONTROL_PAGES, QETH_DBF_CONTROL_NR_AREAS, QETH_DBF_CONTROL_LEN); qeth_dbf_sense = debug_register(QETH_DBF_SENSE_NAME, - QETH_DBF_SENSE_INDEX, + QETH_DBF_SENSE_PAGES, QETH_DBF_SENSE_NR_AREAS, QETH_DBF_SENSE_LEN); qeth_dbf_qerr = debug_register(QETH_DBF_QERR_NAME, - QETH_DBF_QERR_INDEX, + QETH_DBF_QERR_PAGES, QETH_DBF_QERR_NR_AREAS, QETH_DBF_QERR_LEN); qeth_dbf_trace = debug_register(QETH_DBF_TRACE_NAME, - QETH_DBF_TRACE_INDEX, + QETH_DBF_TRACE_PAGES, QETH_DBF_TRACE_NR_AREAS, QETH_DBF_TRACE_LEN); diff --git a/include/asm-s390/debug.h b/include/asm-s390/debug.h index 6bbcdea42a8..92360d90144 100644 --- a/include/asm-s390/debug.h +++ b/include/asm-s390/debug.h @@ -9,6 +9,8 @@ #ifndef DEBUG_H #define DEBUG_H +#include +#include #include /* Note: @@ -31,19 +33,18 @@ struct __debug_entry{ } __attribute__((packed)); -#define __DEBUG_FEATURE_VERSION 1 /* version of debug feature */ +#define __DEBUG_FEATURE_VERSION 2 /* version of debug feature */ #ifdef __KERNEL__ #include #include #include -#include #define DEBUG_MAX_LEVEL 6 /* debug levels range from 0 to 6 */ #define DEBUG_OFF_LEVEL -1 /* level where debug is switched off */ #define DEBUG_FLUSH_ALL -1 /* parameter to flush all areas */ #define DEBUG_MAX_VIEWS 10 /* max number of views in proc fs */ -#define DEBUG_MAX_PROCF_LEN 64 /* max length for a proc file name */ +#define DEBUG_MAX_NAME_LEN 64 /* max length for a debugfs file name */ #define DEBUG_DEFAULT_LEVEL 3 /* initial debug level */ #define DEBUG_DIR_ROOT "s390dbf" /* name of debug root directory in proc fs */ @@ -64,16 +65,17 @@ typedef struct debug_info { spinlock_t lock; int level; int nr_areas; - int page_order; + int pages_per_area; int buf_size; int entry_size; - debug_entry_t** areas; + debug_entry_t*** areas; int active_area; - int *active_entry; - struct proc_dir_entry* proc_root_entry; - struct proc_dir_entry* proc_entries[DEBUG_MAX_VIEWS]; + int *active_pages; + int *active_entries; + struct dentry* debugfs_root_entry; + struct dentry* debugfs_entries[DEBUG_MAX_VIEWS]; struct debug_view* views[DEBUG_MAX_VIEWS]; - char name[DEBUG_MAX_PROCF_LEN]; + char name[DEBUG_MAX_NAME_LEN]; } debug_info_t; typedef int (debug_header_proc_t) (debug_info_t* id, @@ -98,7 +100,7 @@ int debug_dflt_header_fn(debug_info_t* id, struct debug_view* view, int area, debug_entry_t* entry, char* out_buf); struct debug_view { - char name[DEBUG_MAX_PROCF_LEN]; + char name[DEBUG_MAX_NAME_LEN]; debug_prolog_proc_t* prolog_proc; debug_header_proc_t* header_proc; debug_format_proc_t* format_proc; @@ -120,7 +122,7 @@ debug_entry_t* debug_exception_common(debug_info_t* id, int level, /* Debug Feature API: */ -debug_info_t* debug_register(char* name, int pages_index, int nr_areas, +debug_info_t* debug_register(char* name, int pages, int nr_areas, int buf_size); void debug_unregister(debug_info_t* id); @@ -132,7 +134,8 @@ void debug_stop_all(void); extern inline debug_entry_t* debug_event(debug_info_t* id, int level, void* data, int length) { - if ((!id) || (level > id->level)) return NULL; + if ((!id) || (level > id->level) || (id->pages_per_area == 0)) + return NULL; return debug_event_common(id,level,data,length); } @@ -140,7 +143,8 @@ extern inline debug_entry_t* debug_int_event(debug_info_t* id, int level, unsigned int tag) { unsigned int t=tag; - if ((!id) || (level > id->level)) return NULL; + if ((!id) || (level > id->level) || (id->pages_per_area == 0)) + return NULL; return debug_event_common(id,level,&t,sizeof(unsigned int)); } @@ -148,14 +152,16 @@ extern inline debug_entry_t * debug_long_event (debug_info_t* id, int level, unsigned long tag) { unsigned long t=tag; - if ((!id) || (level > id->level)) return NULL; + if ((!id) || (level > id->level) || (id->pages_per_area == 0)) + return NULL; return debug_event_common(id,level,&t,sizeof(unsigned long)); } extern inline debug_entry_t* debug_text_event(debug_info_t* id, int level, const char* txt) { - if ((!id) || (level > id->level)) return NULL; + if ((!id) || (level > id->level) || (id->pages_per_area == 0)) + return NULL; return debug_event_common(id,level,txt,strlen(txt)); } @@ -167,7 +173,8 @@ debug_sprintf_event(debug_info_t* id,int level,char *string,...) extern inline debug_entry_t* debug_exception(debug_info_t* id, int level, void* data, int length) { - if ((!id) || (level > id->level)) return NULL; + if ((!id) || (level > id->level) || (id->pages_per_area == 0)) + return NULL; return debug_exception_common(id,level,data,length); } @@ -175,7 +182,8 @@ extern inline debug_entry_t* debug_int_exception(debug_info_t* id, int level, unsigned int tag) { unsigned int t=tag; - if ((!id) || (level > id->level)) return NULL; + if ((!id) || (level > id->level) || (id->pages_per_area == 0)) + return NULL; return debug_exception_common(id,level,&t,sizeof(unsigned int)); } @@ -183,14 +191,16 @@ extern inline debug_entry_t * debug_long_exception (debug_info_t* id, int level, unsigned long tag) { unsigned long t=tag; - if ((!id) || (level > id->level)) return NULL; + if ((!id) || (level > id->level) || (id->pages_per_area == 0)) + return NULL; return debug_exception_common(id,level,&t,sizeof(unsigned long)); } extern inline debug_entry_t* debug_text_exception(debug_info_t* id, int level, const char* txt) { - if ((!id) || (level > id->level)) return NULL; + if ((!id) || (level > id->level) || (id->pages_per_area == 0)) + return NULL; return debug_exception_common(id,level,txt,strlen(txt)); } -- cgit v1.2.3 From 4d0145a7deab4027a0f0a7de74c2d103b8f029cf Mon Sep 17 00:00:00 2001 From: Lee Nicks Date: Sat, 25 Jun 2005 14:55:36 -0700 Subject: [PATCH] compilation errors in drivers/serial/mpsc.c The following patch fix gcc 4 compilation errors in drivers/serial/mpsc.c Signed-off-by: Lee Nicks Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/serial/mpsc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/serial/mpsc.c b/drivers/serial/mpsc.c index a8314aee2ab..a2a64331800 100644 --- a/drivers/serial/mpsc.c +++ b/drivers/serial/mpsc.c @@ -67,7 +67,11 @@ static struct mpsc_port_info mpsc_ports[MPSC_NUM_CTLRS]; static struct mpsc_shared_regs mpsc_shared_regs; +static struct uart_driver mpsc_reg; +static void mpsc_start_rx(struct mpsc_port_info *pi); +static void mpsc_free_ring_mem(struct mpsc_port_info *pi); +static void mpsc_release_port(struct uart_port *port); /* ****************************************************************************** * @@ -546,7 +550,6 @@ static int mpsc_alloc_ring_mem(struct mpsc_port_info *pi) { int rc = 0; - static void mpsc_free_ring_mem(struct mpsc_port_info *pi); pr_debug("mpsc_alloc_ring_mem[%d]: Allocating ring mem\n", pi->port.line); @@ -745,7 +748,6 @@ mpsc_rx_intr(struct mpsc_port_info *pi, struct pt_regs *regs) int rc = 0; u8 *bp; char flag = TTY_NORMAL; - static void mpsc_start_rx(struct mpsc_port_info *pi); pr_debug("mpsc_rx_intr[%d]: Handling Rx intr\n", pi->port.line); @@ -1178,7 +1180,6 @@ static void mpsc_shutdown(struct uart_port *port) { struct mpsc_port_info *pi = (struct mpsc_port_info *)port; - static void mpsc_release_port(struct uart_port *port); pr_debug("mpsc_shutdown[%d]: Shutting down MPSC\n", port->line); @@ -1448,7 +1449,6 @@ mpsc_console_setup(struct console *co, char *options) return uart_set_options(&pi->port, co, baud, parity, bits, flow); } -extern struct uart_driver mpsc_reg; static struct console mpsc_console = { .name = MPSC_DEV_NAME, .write = mpsc_console_write, -- cgit v1.2.3 From ae67cd643e9e64217fd92457324625c67fec6e35 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 25 Jun 2005 14:55:36 -0700 Subject: [PATCH] Makefile: s/gcc-option/cc-option/ Fixes http://bugme.osdl.org/show_bug.cgi?id=4726 Signed-off-by: Alexey Dobriyan Cc: Sam Ravnborg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index fad349724e9..1fdace757e1 100644 --- a/Makefile +++ b/Makefile @@ -281,7 +281,7 @@ export quiet Q KBUILD_VERBOSE # See documentation in Documentation/kbuild/makefiles.txt # cc-option -# Usage: cflags-y += $(call gcc-option, -march=winchip-c6, -march=i586) +# Usage: cflags-y += $(call cc-option, -march=winchip-c6, -march=i586) cc-option = $(shell if $(CC) $(CFLAGS) $(1) -S -o /dev/null -xc /dev/null \ > /dev/null 2>&1; then echo "$(1)"; else echo "$(2)"; fi ;) -- cgit v1.2.3 From b2b18660066997420b716c1881a6be8b82700d97 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 25 Jun 2005 14:55:38 -0700 Subject: [PATCH] RCU: clean up a few remaining synchronize_kernel() calls 2.6.12-rc6-mm1 has a few remaining synchronize_kernel()s, some (but not all) in comments. This patch changes these synchronize_kernel() calls (and comments) to synchronize_rcu() or synchronize_sched() as follows: - arch/x86_64/kernel/mce.c mce_read(): change to synchronize_sched() to handle races with machine-check exceptions (synchronize_rcu() would not cut it given RCU implementations intended for hardcore realtime use. - drivers/input/serio/i8042.c i8042_stop(): change to synchronize_sched() to handle races with i8042_interrupt() interrupt handler. Again, synchronize_rcu() would not cut it given RCU implementations intended for hardcore realtime use. - include/*/kdebug.h comments: change to synchronize_sched() to handle races with NMIs. As before, synchronize_rcu() would not cut it... - include/linux/list.h comment: change to synchronize_rcu(), since this comment is for list_del_rcu(). - security/keys/key.c unregister_key_type(): change to synchronize_rcu(), since this is interacting with RCU read side. - security/keys/process_keys.c install_session_keyring(): change to synchronize_rcu(), since this is interacting with RCU read side. Signed-off-by: "Paul E. McKenney" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/mce.c | 2 +- drivers/input/serio/i8042.c | 2 +- include/asm-i386/kdebug.h | 2 +- include/asm-ppc64/kdebug.h | 2 +- include/asm-sparc64/kdebug.h | 2 +- include/asm-x86_64/kdebug.h | 2 +- include/linux/list.h | 2 +- security/keys/key.c | 2 +- security/keys/process_keys.c | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index 7ab15c8ab95..21e70625a49 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c @@ -411,7 +411,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, loff memset(mcelog.entry, 0, next * sizeof(struct mce)); mcelog.next = 0; - synchronize_kernel(); + synchronize_sched(); /* Collect entries that were still getting written before the synchronize. */ diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index 5900de3c3f4..a9bf549c8dc 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -396,7 +396,7 @@ static void i8042_stop(struct serio *serio) struct i8042_port *port = serio->port_data; port->exists = 0; - synchronize_kernel(); + synchronize_sched(); port->serio = NULL; } diff --git a/include/asm-i386/kdebug.h b/include/asm-i386/kdebug.h index de6498b0d49..b3f8d5f59d5 100644 --- a/include/asm-i386/kdebug.h +++ b/include/asm-i386/kdebug.h @@ -18,7 +18,7 @@ struct die_args { }; /* Note - you should never unregister because that can race with NMIs. - If you really want to do it first unregister - then synchronize_kernel - then free. + If you really want to do it first unregister - then synchronize_sched - then free. */ int register_die_notifier(struct notifier_block *nb); extern struct notifier_block *i386die_chain; diff --git a/include/asm-ppc64/kdebug.h b/include/asm-ppc64/kdebug.h index 488634258a7..d383d161cf8 100644 --- a/include/asm-ppc64/kdebug.h +++ b/include/asm-ppc64/kdebug.h @@ -17,7 +17,7 @@ struct die_args { /* Note - you should never unregister because that can race with NMIs. - If you really want to do it first unregister - then synchronize_kernel - + If you really want to do it first unregister - then synchronize_sched - then free. */ int register_die_notifier(struct notifier_block *nb); diff --git a/include/asm-sparc64/kdebug.h b/include/asm-sparc64/kdebug.h index f70d3dad01f..6321f5a0198 100644 --- a/include/asm-sparc64/kdebug.h +++ b/include/asm-sparc64/kdebug.h @@ -16,7 +16,7 @@ struct die_args { }; /* Note - you should never unregister because that can race with NMIs. - * If you really want to do it first unregister - then synchronize_kernel + * If you really want to do it first unregister - then synchronize_sched * - then free. */ int register_die_notifier(struct notifier_block *nb); diff --git a/include/asm-x86_64/kdebug.h b/include/asm-x86_64/kdebug.h index 6277f75cbb4..b90341994d8 100644 --- a/include/asm-x86_64/kdebug.h +++ b/include/asm-x86_64/kdebug.h @@ -14,7 +14,7 @@ struct die_args { }; /* Note - you should never unregister because that can race with NMIs. - If you really want to do it first unregister - then synchronize_kernel - then free. + If you really want to do it first unregister - then synchronize_sched - then free. */ int register_die_notifier(struct notifier_block *nb); extern struct notifier_block *die_chain; diff --git a/include/linux/list.h b/include/linux/list.h index 399b51d1721..aab2db21b01 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -185,7 +185,7 @@ static inline void list_del(struct list_head *entry) * list_for_each_entry_rcu(). * * Note that the caller is not permitted to immediately free - * the newly deleted entry. Instead, either synchronize_kernel() + * the newly deleted entry. Instead, either synchronize_rcu() * or call_rcu() must be used to defer freeing until an RCU * grace period has elapsed. */ diff --git a/security/keys/key.c b/security/keys/key.c index 3304d37bb37..fb89f984446 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -980,7 +980,7 @@ void unregister_key_type(struct key_type *ktype) spin_unlock(&key_serial_lock); /* make sure everyone revalidates their keys */ - synchronize_kernel(); + synchronize_rcu(); /* we should now be able to destroy the payloads of all the keys of * this type with impunity */ diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 34db087bbcc..9b0369c5a22 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -234,7 +234,7 @@ static int install_session_keyring(struct task_struct *tsk, ret = 0; /* we're using RCU on the pointer */ - synchronize_kernel(); + synchronize_rcu(); key_put(old); error: return ret; -- cgit v1.2.3 From daacdfa6e7d6e57c5d1b8e72b1c863feb53d8a82 Mon Sep 17 00:00:00 2001 From: Kylene Jo Hall Date: Sat, 25 Jun 2005 14:55:39 -0700 Subject: [PATCH] tpm: Support new National TPMs This patch is work to support new National TPMs that problems were reported with on Thinkpad T43 and Thinkcentre S51. Thanks to Jens and Gang for their debugging work on these issues. Signed-off-by: Kylene Hall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/tpm/tpm.h | 14 +++---- drivers/char/tpm/tpm_atmel.c | 16 ++++---- drivers/char/tpm/tpm_nsc.c | 93 ++++++++++++++++++-------------------------- 3 files changed, 53 insertions(+), 70 deletions(-) diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h index 10cb450191a..373b41f6b46 100644 --- a/drivers/char/tpm/tpm.h +++ b/drivers/char/tpm/tpm.h @@ -31,8 +31,8 @@ enum tpm_timeout { /* TPM addresses */ enum tpm_addr { + TPM_SUPERIO_ADDR = 0x2E, TPM_ADDR = 0x4E, - TPM_DATA = 0x4F }; extern ssize_t tpm_show_pubek(struct device *, struct device_attribute *attr, @@ -79,16 +79,16 @@ struct tpm_chip { struct list_head list; }; -static inline int tpm_read_index(int index) +static inline int tpm_read_index(int base, int index) { - outb(index, TPM_ADDR); - return inb(TPM_DATA) & 0xFF; + outb(index, base); + return inb(base+1) & 0xFF; } -static inline void tpm_write_index(int index, int value) +static inline void tpm_write_index(int base, int index, int value) { - outb(index, TPM_ADDR); - outb(value & 0xFF, TPM_DATA); + outb(index, base); + outb(value & 0xFF, base+1); } extern int tpm_register_hardware(struct pci_dev *, diff --git a/drivers/char/tpm/tpm_atmel.c b/drivers/char/tpm/tpm_atmel.c index 61fe14a7712..cc2cc77fd17 100644 --- a/drivers/char/tpm/tpm_atmel.c +++ b/drivers/char/tpm/tpm_atmel.c @@ -163,24 +163,24 @@ static int __devinit tpm_atml_init(struct pci_dev *pci_dev, if (pci_enable_device(pci_dev)) return -EIO; - lo = tpm_read_index( TPM_ATMEL_BASE_ADDR_LO ); - hi = tpm_read_index( TPM_ATMEL_BASE_ADDR_HI ); + lo = tpm_read_index(TPM_ADDR, TPM_ATMEL_BASE_ADDR_LO); + hi = tpm_read_index(TPM_ADDR, TPM_ATMEL_BASE_ADDR_HI); tpm_atmel.base = (hi<<8)|lo; dev_dbg( &pci_dev->dev, "Operating with base: 0x%x\n", tpm_atmel.base); /* verify that it is an Atmel part */ - if (tpm_read_index(4) != 'A' || tpm_read_index(5) != 'T' - || tpm_read_index(6) != 'M' || tpm_read_index(7) != 'L') { + if (tpm_read_index(TPM_ADDR, 4) != 'A' || tpm_read_index(TPM_ADDR, 5) != 'T' + || tpm_read_index(TPM_ADDR, 6) != 'M' || tpm_read_index(TPM_ADDR, 7) != 'L') { rc = -ENODEV; goto out_err; } /* query chip for its version number */ - if ((version[0] = tpm_read_index(0x00)) != 0xFF) { - version[1] = tpm_read_index(0x01); - version[2] = tpm_read_index(0x02); - version[3] = tpm_read_index(0x03); + if ((version[0] = tpm_read_index(TPM_ADDR, 0x00)) != 0xFF) { + version[1] = tpm_read_index(TPM_ADDR, 0x01); + version[2] = tpm_read_index(TPM_ADDR, 0x02); + version[3] = tpm_read_index(TPM_ADDR, 0x03); } else { dev_info(&pci_dev->dev, "version query failed\n"); rc = -ENODEV; diff --git a/drivers/char/tpm/tpm_nsc.c b/drivers/char/tpm/tpm_nsc.c index 1a45e7dfc13..b4127348c06 100644 --- a/drivers/char/tpm/tpm_nsc.c +++ b/drivers/char/tpm/tpm_nsc.c @@ -23,7 +23,6 @@ /* National definitions */ enum tpm_nsc_addr{ - TPM_NSC_BASE = 0x360, TPM_NSC_IRQ = 0x07, TPM_NSC_BASE0_HI = 0x60, TPM_NSC_BASE0_LO = 0x61, @@ -56,6 +55,7 @@ enum tpm_nsc_status { NSC_STATUS_RDY = 0x10, /* ready to receive command */ NSC_STATUS_IBR = 0x20 /* ready to receive data */ }; + /* command bits */ enum tpm_nsc_cmd_mode { NSC_COMMAND_NORMAL = 0x01, /* normal mode */ @@ -150,7 +150,8 @@ static int tpm_nsc_recv(struct tpm_chip *chip, u8 * buf, size_t count) *p = inb(chip->vendor->base + NSC_DATA); } - if ((data & NSC_STATUS_F0) == 0) { + if ((data & NSC_STATUS_F0) == 0 && + (wait_for_stat(chip, NSC_STATUS_F0, NSC_STATUS_F0, &data) < 0)) { dev_err(&chip->pci_dev->dev, "F0 not set\n"); return -EIO; } @@ -259,85 +260,64 @@ static int __devinit tpm_nsc_init(struct pci_dev *pci_dev, { int rc = 0; int lo, hi; + int nscAddrBase = TPM_ADDR; - hi = tpm_read_index(TPM_NSC_BASE0_HI); - lo = tpm_read_index(TPM_NSC_BASE0_LO); - - tpm_nsc.base = (hi<<8) | lo; if (pci_enable_device(pci_dev)) return -EIO; + /* select PM channel 1 */ + tpm_write_index(nscAddrBase,NSC_LDN_INDEX, 0x12); + /* verify that it is a National part (SID) */ - if (tpm_read_index(NSC_SID_INDEX) != 0xEF) { - rc = -ENODEV; - goto out_err; + if (tpm_read_index(TPM_ADDR, NSC_SID_INDEX) != 0xEF) { + nscAddrBase = (tpm_read_index(TPM_SUPERIO_ADDR, 0x2C)<<8)| + (tpm_read_index(TPM_SUPERIO_ADDR, 0x2B)&0xFE); + if (tpm_read_index(nscAddrBase, NSC_SID_INDEX) != 0xF6) { + rc = -ENODEV; + goto out_err; + } } + hi = tpm_read_index(nscAddrBase, TPM_NSC_BASE0_HI); + lo = tpm_read_index(nscAddrBase, TPM_NSC_BASE0_LO); + tpm_nsc.base = (hi<<8) | lo; + dev_dbg(&pci_dev->dev, "NSC TPM detected\n"); dev_dbg(&pci_dev->dev, "NSC LDN 0x%x, SID 0x%x, SRID 0x%x\n", - tpm_read_index(0x07), tpm_read_index(0x20), - tpm_read_index(0x27)); + tpm_read_index(nscAddrBase,0x07), tpm_read_index(nscAddrBase,0x20), + tpm_read_index(nscAddrBase,0x27)); dev_dbg(&pci_dev->dev, "NSC SIOCF1 0x%x SIOCF5 0x%x SIOCF6 0x%x SIOCF8 0x%x\n", - tpm_read_index(0x21), tpm_read_index(0x25), - tpm_read_index(0x26), tpm_read_index(0x28)); + tpm_read_index(nscAddrBase,0x21), tpm_read_index(nscAddrBase,0x25), + tpm_read_index(nscAddrBase,0x26), tpm_read_index(nscAddrBase,0x28)); dev_dbg(&pci_dev->dev, "NSC IO Base0 0x%x\n", - (tpm_read_index(0x60) << 8) | tpm_read_index(0x61)); + (tpm_read_index(nscAddrBase,0x60) << 8) | tpm_read_index(nscAddrBase,0x61)); dev_dbg(&pci_dev->dev, "NSC IO Base1 0x%x\n", - (tpm_read_index(0x62) << 8) | tpm_read_index(0x63)); + (tpm_read_index(nscAddrBase,0x62) << 8) | tpm_read_index(nscAddrBase,0x63)); dev_dbg(&pci_dev->dev, "NSC Interrupt number and wakeup 0x%x\n", - tpm_read_index(0x70)); + tpm_read_index(nscAddrBase,0x70)); dev_dbg(&pci_dev->dev, "NSC IRQ type select 0x%x\n", - tpm_read_index(0x71)); + tpm_read_index(nscAddrBase,0x71)); dev_dbg(&pci_dev->dev, "NSC DMA channel select0 0x%x, select1 0x%x\n", - tpm_read_index(0x74), tpm_read_index(0x75)); + tpm_read_index(nscAddrBase,0x74), tpm_read_index(nscAddrBase,0x75)); dev_dbg(&pci_dev->dev, "NSC Config " "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n", - tpm_read_index(0xF0), tpm_read_index(0xF1), - tpm_read_index(0xF2), tpm_read_index(0xF3), - tpm_read_index(0xF4), tpm_read_index(0xF5), - tpm_read_index(0xF6), tpm_read_index(0xF7), - tpm_read_index(0xF8), tpm_read_index(0xF9)); + tpm_read_index(nscAddrBase,0xF0), tpm_read_index(nscAddrBase,0xF1), + tpm_read_index(nscAddrBase,0xF2), tpm_read_index(nscAddrBase,0xF3), + tpm_read_index(nscAddrBase,0xF4), tpm_read_index(nscAddrBase,0xF5), + tpm_read_index(nscAddrBase,0xF6), tpm_read_index(nscAddrBase,0xF7), + tpm_read_index(nscAddrBase,0xF8), tpm_read_index(nscAddrBase,0xF9)); dev_info(&pci_dev->dev, - "NSC PC21100 TPM revision %d\n", - tpm_read_index(0x27) & 0x1F); - - if (tpm_read_index(NSC_LDC_INDEX) == 0) - dev_info(&pci_dev->dev, ": NSC TPM not active\n"); - - /* select PM channel 1 */ - tpm_write_index(NSC_LDN_INDEX, 0x12); - tpm_read_index(NSC_LDN_INDEX); - - /* disable the DPM module */ - tpm_write_index(NSC_LDC_INDEX, 0); - tpm_read_index(NSC_LDC_INDEX); - - /* set the data register base addresses */ - tpm_write_index(NSC_DIO_INDEX, TPM_NSC_BASE >> 8); - tpm_write_index(NSC_DIO_INDEX + 1, TPM_NSC_BASE); - tpm_read_index(NSC_DIO_INDEX); - tpm_read_index(NSC_DIO_INDEX + 1); - - /* set the command register base addresses */ - tpm_write_index(NSC_CIO_INDEX, (TPM_NSC_BASE + 1) >> 8); - tpm_write_index(NSC_CIO_INDEX + 1, (TPM_NSC_BASE + 1)); - tpm_read_index(NSC_DIO_INDEX); - tpm_read_index(NSC_DIO_INDEX + 1); - - /* set the interrupt number to be used for the host interface */ - tpm_write_index(NSC_IRQ_INDEX, TPM_NSC_IRQ); - tpm_write_index(NSC_ITS_INDEX, 0x00); - tpm_read_index(NSC_IRQ_INDEX); + "NSC TPM revision %d\n", + tpm_read_index(nscAddrBase, 0x27) & 0x1F); /* enable the DPM module */ - tpm_write_index(NSC_LDC_INDEX, 0x01); - tpm_read_index(NSC_LDC_INDEX); + tpm_write_index(nscAddrBase, NSC_LDC_INDEX, 0x01); if ((rc = tpm_register_hardware(pci_dev, &tpm_nsc)) < 0) goto out_err; @@ -355,6 +335,9 @@ static struct pci_device_id tpm_pci_tbl[] __devinitdata = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_0)}, {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_12)}, {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_0)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_0)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_0)}, {PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8111_LPC)}, {0,} }; -- cgit v1.2.3 From 1dda8abe6feb906306a627b170654ddd8addcdac Mon Sep 17 00:00:00 2001 From: Kylene Jo Hall Date: Sat, 25 Jun 2005 14:55:40 -0700 Subject: [PATCH] tpm: Fix pubek parsing Fix parsing of the PUBEK for display which was leading to showing the wrong modulus length and modulus. Signed-off-by: Kylene Hall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/tpm/tpm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c index 5c843c9bf81..1156d11f02d 100644 --- a/drivers/char/tpm/tpm.c +++ b/drivers/char/tpm/tpm.c @@ -233,10 +233,10 @@ ssize_t tpm_show_pubek(struct device *dev, struct device_attribute *attr, data[15], data[16], data[17], data[22], data[23], data[24], data[25], data[26], data[27], data[28], data[29], data[30], data[31], data[32], data[33], - be32_to_cpu(*((__be32 *) (data + 32)))); + be32_to_cpu(*((__be32 *) (data + 34)))); for (i = 0; i < 256; i++) { - str += sprintf(str, "%02X ", data[i + 39]); + str += sprintf(str, "%02X ", data[i + 38]); if ((i + 1) % 16 == 0) str += sprintf(str, "\n"); } -- cgit v1.2.3 From 6f9beccb95a47a15e446f64fbb7041dc6edce4d9 Mon Sep 17 00:00:00 2001 From: Kylene Jo Hall Date: Sat, 25 Jun 2005 14:55:41 -0700 Subject: [PATCH] tpm: fix misc name memory problem I was using invalid memory for the miscdevice.name. This patch fixes the problem which was manifested by an ugly entry in /proc/misc. Signed-off-by: Kylene Hall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/tpm/tpm.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c index 1156d11f02d..854475c54f0 100644 --- a/drivers/char/tpm/tpm.c +++ b/drivers/char/tpm/tpm.c @@ -464,6 +464,7 @@ void __devexit tpm_remove(struct pci_dev *pci_dev) pci_set_drvdata(pci_dev, NULL); misc_deregister(&chip->vendor->miscdev); + kfree(&chip->vendor->miscdev.name); sysfs_remove_group(&pci_dev->dev.kobj, chip->vendor->attr_group); @@ -526,7 +527,9 @@ EXPORT_SYMBOL_GPL(tpm_pm_resume); int tpm_register_hardware(struct pci_dev *pci_dev, struct tpm_vendor_specific *entry) { - char devname[7]; +#define DEVNAME_SIZE 7 + + char *devname; struct tpm_chip *chip; int i, j; @@ -569,7 +572,8 @@ dev_num_search_complete: else chip->vendor->miscdev.minor = MISC_DYNAMIC_MINOR; - snprintf(devname, sizeof(devname), "%s%d", "tpm", chip->dev_num); + devname = kmalloc(DEVNAME_SIZE, GFP_KERNEL); + scnprintf(devname, DEVNAME_SIZE, "%s%d", "tpm", chip->dev_num); chip->vendor->miscdev.name = devname; chip->vendor->miscdev.dev = &(pci_dev->dev); -- cgit v1.2.3 From 8ae0b77811d97552b3b3c745e97de18849583bf7 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 25 Jun 2005 14:55:41 -0700 Subject: [PATCH] fix fsync(dir) return value for ram-based filesystems Any filesystem which is using simple_dir_operations will retunr -EINVAL for fsync() on a directory. Make it return zero instead. Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/libfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/libfs.c b/fs/libfs.c index 5025563e737..58101dff2c6 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -183,6 +183,7 @@ struct file_operations simple_dir_operations = { .llseek = dcache_dir_lseek, .read = generic_read_dir, .readdir = dcache_readdir, + .fsync = simple_sync_file, }; struct inode_operations simple_dir_inode_operations = { -- cgit v1.2.3 From b0cfbd995d091b10841eeb948976f5d1fbf13cdd Mon Sep 17 00:00:00 2001 From: Badari Pulavarty Date: Sat, 25 Jun 2005 14:55:42 -0700 Subject: [PATCH] fix for generic_file_write iov problem Here is the fix for the problem described in http://bugzilla.kernel.org/show_bug.cgi?id=4721 Basically, problem is generic_file_buffered_write() is accessing beyond end of the iov[] vector after handling the last vector. If we happen to cross page boundary, we get a fault. I think this simple patch is good enough. If we really don't want to depend on the "count", then we need pass nr_segs to filemap_set_next_iovec() and decrement it and check it. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/filemap.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/filemap.c b/mm/filemap.c index b573607b711..c11418dd94e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1954,7 +1954,9 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, if (unlikely(nr_segs > 1)) { filemap_set_next_iovec(&cur_iov, &iov_base, status); - buf = cur_iov->iov_base + iov_base; + if (count) + buf = cur_iov->iov_base + + iov_base; } else { iov_base += status; } -- cgit v1.2.3 From 442ff702233287df3f50ec3a7fd0a73d7367cf5a Mon Sep 17 00:00:00 2001 From: Jean-Christophe Dubois Date: Sat, 25 Jun 2005 14:55:43 -0700 Subject: [PATCH] mconf.c needs locale.h This is failing on my cross-compilation environment (From a solaris system) using gcc-3.4.1 (as the compiler can't find a prototype for the setlocale() function). Signed-off-by: Jean-Christophe Dubois Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/kconfig/mconf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c index e5db10ca956..98039bf721f 100644 --- a/scripts/kconfig/mconf.c +++ b/scripts/kconfig/mconf.c @@ -20,6 +20,7 @@ #include #include #include +#include #define LKC_DIRECT_LINK #include "lkc.h" -- cgit v1.2.3 From 6283d58e7464f82b1c1c33943f0bd51c1e83899a Mon Sep 17 00:00:00 2001 From: Qu Fuping Date: Sat, 25 Jun 2005 14:55:44 -0700 Subject: [PATCH] reiserfs: do not ignore i/io error on readpage Reiserfs's readpage does not notice i/o errors. This patch makes reiserfs_readpage to return -EIO when i/o error appears. This patch makes reiserfs to not ignore I/O error on readpage. Signed-off-by: Qu Fuping Signed-off-by: Vladimir V. Saveliev Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/reiserfs/inode.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 0d5817f8197..289d864fe73 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -254,6 +254,7 @@ static int _get_block_create_0 (struct inode * inode, long block, char * p = NULL; int chars; int ret ; + int result ; int done = 0 ; unsigned long offset ; @@ -262,10 +263,13 @@ static int _get_block_create_0 (struct inode * inode, long block, (loff_t)block * inode->i_sb->s_blocksize + 1, TYPE_ANY, 3); research: - if (search_for_position_by_key (inode->i_sb, &key, &path) != POSITION_FOUND) { + result = search_for_position_by_key (inode->i_sb, &key, &path) ; + if (result != POSITION_FOUND) { pathrelse (&path); if (p) kunmap(bh_result->b_page) ; + if (result == IO_ERROR) + return -EIO; // We do not return -ENOENT if there is a hole but page is uptodate, because it means // That there is some MMAPED data associated with it that is yet to be written to disk. if ((args & GET_BLOCK_NO_HOLE) && !PageUptodate(bh_result->b_page) ) { @@ -382,8 +386,9 @@ research: // update key to look for the next piece set_cpu_key_k_offset (&key, cpu_key_k_offset (&key) + chars); - if (search_for_position_by_key (inode->i_sb, &key, &path) != POSITION_FOUND) - // we read something from tail, even if now we got IO_ERROR + result = search_for_position_by_key (inode->i_sb, &key, &path); + if (result != POSITION_FOUND) + // i/o error most likely break; bh = get_last_bh (&path); ih = get_ih (&path); @@ -394,6 +399,10 @@ research: finished: pathrelse (&path); + + if (result == IO_ERROR) + return -EIO; + /* this buffer has valid data, but isn't valid for io. mapping it to * block #0 tells the rest of reiserfs it just has a tail in it */ -- cgit v1.2.3 From 44f410a7ce593e7e75667b93494223998069f3f1 Mon Sep 17 00:00:00 2001 From: Jon Smirl Date: Sat, 25 Jun 2005 14:57:05 -0700 Subject: [PATCH] hpet: do_div fix We don't need to use do_div() on a 32-bit quantity. Signed-off-by: Jon Smirl Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/hpet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c index 5ec732e6ca9..762fa430fb5 100644 --- a/drivers/char/hpet.c +++ b/drivers/char/hpet.c @@ -834,7 +834,7 @@ int hpet_alloc(struct hpet_data *hdp) printk("\n"); ns = hpetp->hp_period; /* femptoseconds, 10^-15 */ - do_div(ns, 1000000); /* convert to nanoseconds, 10^-9 */ + ns /= 1000000; /* convert to nanoseconds, 10^-9 */ printk(KERN_INFO "hpet%d: %ldns tick, %d %d-bit timers\n", hpetp->hp_which, ns, hpetp->hp_ntimer, cap & HPET_COUNTER_SIZE_MASK ? 64 : 32); -- cgit v1.2.3 From e0f364f4069f76a3613a797c388832822d179076 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 25 Jun 2005 14:57:06 -0700 Subject: [PATCH] sched: cleanup wake_idle New sched-domains code means we don't get spans with offline CPUs in them. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 76080d142e3..86be13ee500 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -927,14 +927,14 @@ static int wake_idle(int cpu, task_t *p) for_each_domain(cpu, sd) { if (sd->flags & SD_WAKE_IDLE) { - cpus_and(tmp, sd->span, cpu_online_map); - cpus_and(tmp, tmp, p->cpus_allowed); + cpus_and(tmp, sd->span, p->cpus_allowed); for_each_cpu_mask(i, tmp) { if (idle_cpu(i)) return i; } } - else break; + else + break; } return cpu; } -- cgit v1.2.3 From 8102679447da7fcbcb5226ee0207c3a034bc6d5f Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 25 Jun 2005 14:57:07 -0700 Subject: [PATCH] sched: improve load balancing pinned tasks John Hawkes explained the problem best: A large number of processes that are pinned to a single CPU results in every other CPU's load_balance() seeing this overloaded CPU as "busiest", yet move_tasks() never finds a task to pull-migrate. This condition occurs during module unload, but can also occur as a denial-of-service using sys_sched_setaffinity(). Several hundred CPUs performing this fruitless load_balance() will livelock on the busiest CPU's runqueue lock. A smaller number of CPUs will livelock if the pinned task count gets high. Expanding slightly on John's patch, this one attempts to work out whether the balancing failure has been due to too many tasks pinned on the runqueue. This allows it to be basically invisible to the regular blancing paths (ie. when there are no pinned tasks). We can use this extra knowledge to shut down the balancing faster, and ensure the migration threads don't start running which is another problem observed in the wild. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched.c | 62 ++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 86be13ee500..2794c79b919 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1632,7 +1632,7 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, */ static inline int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu, - struct sched_domain *sd, enum idle_type idle) + struct sched_domain *sd, enum idle_type idle, int *all_pinned) { /* * We do not migrate tasks that are: @@ -1640,10 +1640,12 @@ int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu, * 2) cannot be migrated to this CPU due to cpus_allowed, or * 3) are cache-hot on their current CPU. */ - if (task_running(rq, p)) - return 0; if (!cpu_isset(this_cpu, p->cpus_allowed)) return 0; + *all_pinned = 0; + + if (task_running(rq, p)) + return 0; /* * Aggressive migration if: @@ -1656,7 +1658,7 @@ int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu, return 1; if (task_hot(p, rq->timestamp_last_tick, sd)) - return 0; + return 0; return 1; } @@ -1669,16 +1671,18 @@ int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu, */ static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest, unsigned long max_nr_move, struct sched_domain *sd, - enum idle_type idle) + enum idle_type idle, int *all_pinned) { prio_array_t *array, *dst_array; struct list_head *head, *curr; - int idx, pulled = 0; + int idx, pulled = 0, pinned = 0; task_t *tmp; - if (max_nr_move <= 0 || busiest->nr_running <= 1) + if (max_nr_move == 0) goto out; + pinned = 1; + /* * We first consider expired tasks. Those will likely not be * executed in the near future, and they are most likely to @@ -1717,7 +1721,7 @@ skip_queue: curr = curr->prev; - if (!can_migrate_task(tmp, busiest, this_cpu, sd, idle)) { + if (!can_migrate_task(tmp, busiest, this_cpu, sd, idle, &pinned)) { if (curr != head) goto skip_queue; idx++; @@ -1746,6 +1750,9 @@ out: * inside pull_task(). */ schedstat_add(sd, lb_gained[idle], pulled); + + if (all_pinned) + *all_pinned = pinned; return pulled; } @@ -1917,7 +1924,8 @@ static int load_balance(int this_cpu, runqueue_t *this_rq, struct sched_group *group; runqueue_t *busiest; unsigned long imbalance; - int nr_moved; + int nr_moved, all_pinned; + int active_balance = 0; spin_lock(&this_rq->lock); schedstat_inc(sd, lb_cnt[idle]); @@ -1956,9 +1964,15 @@ static int load_balance(int this_cpu, runqueue_t *this_rq, */ double_lock_balance(this_rq, busiest); nr_moved = move_tasks(this_rq, this_cpu, busiest, - imbalance, sd, idle); + imbalance, sd, idle, + &all_pinned); spin_unlock(&busiest->lock); + + /* All tasks on this runqueue were pinned by CPU affinity */ + if (unlikely(all_pinned)) + goto out_balanced; } + spin_unlock(&this_rq->lock); if (!nr_moved) { @@ -1966,16 +1980,15 @@ static int load_balance(int this_cpu, runqueue_t *this_rq, sd->nr_balance_failed++; if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) { - int wake = 0; spin_lock(&busiest->lock); if (!busiest->active_balance) { busiest->active_balance = 1; busiest->push_cpu = this_cpu; - wake = 1; + active_balance = 1; } spin_unlock(&busiest->lock); - if (wake) + if (active_balance) wake_up_process(busiest->migration_thread); /* @@ -1984,18 +1997,21 @@ static int load_balance(int this_cpu, runqueue_t *this_rq, */ sd->nr_balance_failed = sd->cache_nice_tries; } - - /* - * We were unbalanced, but unsuccessful in move_tasks(), - * so bump the balance_interval to lessen the lock contention. - */ - if (sd->balance_interval < sd->max_interval) - sd->balance_interval++; - } else { + } else sd->nr_balance_failed = 0; + if (likely(!active_balance)) { /* We were unbalanced, so reset the balancing interval */ sd->balance_interval = sd->min_interval; + } else { + /* + * If we've begun active balancing, start to back off. This + * case may not be covered by the all_pinned logic if there + * is only 1 task on the busy runqueue (because we don't call + * move_tasks). + */ + if (sd->balance_interval < sd->max_interval) + sd->balance_interval *= 2; } return nr_moved; @@ -2047,7 +2063,7 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq, schedstat_add(sd, lb_imbalance[NEWLY_IDLE], imbalance); nr_moved = move_tasks(this_rq, this_cpu, busiest, - imbalance, sd, NEWLY_IDLE); + imbalance, sd, NEWLY_IDLE, NULL); if (!nr_moved) schedstat_inc(sd, lb_failed[NEWLY_IDLE]); @@ -2126,7 +2142,7 @@ static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu) /* move a task from busiest_rq to target_rq */ double_lock_balance(busiest_rq, target_rq); if (move_tasks(target_rq, cpu, busiest_rq, - 1, sd, SCHED_IDLE)) { + 1, sd, SCHED_IDLE, NULL)) { schedstat_inc(sd, alb_pushed); } else { schedstat_inc(sd, alb_failed); -- cgit v1.2.3 From 16cfb1c04c3cbe3759f339d3333e7e1e7d59712a Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 25 Jun 2005 14:57:08 -0700 Subject: [PATCH] sched: reduce active load balancing Fix up active load balancing a bit so it doesn't get called when it shouldn't. Reset the nr_balance_failed counter at more points where we have found conditions to be balanced. This reduces too aggressive active balancing seen on some workloads. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 2794c79b919..03d737791c1 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2021,6 +2021,7 @@ out_balanced: schedstat_inc(sd, lb_balanced[idle]); + sd->nr_balance_failed = 0; /* tune up the balancing interval */ if (sd->balance_interval < sd->max_interval) sd->balance_interval *= 2; @@ -2046,16 +2047,14 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq, schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE); if (!group) { - schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]); - goto out; + goto out_balanced; } busiest = find_busiest_queue(group); if (!busiest || busiest == this_rq) { - schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]); - goto out; + goto out_balanced; } /* Attempt to move tasks */ @@ -2066,11 +2065,16 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq, imbalance, sd, NEWLY_IDLE, NULL); if (!nr_moved) schedstat_inc(sd, lb_failed[NEWLY_IDLE]); + else + sd->nr_balance_failed = 0; spin_unlock(&busiest->lock); - -out: return nr_moved; + +out_balanced: + schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); + sd->nr_balance_failed = 0; + return 0; } /* -- cgit v1.2.3 From 3950745131e23472fb5ace2ee4a2093e7590ec69 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 25 Jun 2005 14:57:09 -0700 Subject: [PATCH] sched: fix SMT scheduling problems SMT balancing has a couple of problems. Firstly, active_load_balance is too complex - basically it should be a dumb helper for when the periodic balancer has determined there is an imbalance, but gets stuck because the task is running. So rip out all its "smarts", and just make it move one task to the target CPU. Second, the busy CPU's sched-domain tree was being used for active balancing. This means that it may not see that nr_balance_failed has reached a critical level. So use the target CPU's sched-domain tree for this. We can do this because we hold its runqueue lock. Lastly, reset nr_balance_failed to a point where we allow cache hot migration. This will help ensure active load balancing is successful. Thanks to Suresh Siddha for pointing out these issues. Signed-off-by: Nick Piggin Signed-off-by: Suresh Siddha Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched.c | 76 ++++++++++++++++++++++++---------------------------------- 1 file changed, 31 insertions(+), 45 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 03d737791c1..41e69b5ee65 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1995,7 +1995,7 @@ static int load_balance(int this_cpu, runqueue_t *this_rq, * We've kicked active balancing, reset the failure * counter. */ - sd->nr_balance_failed = sd->cache_nice_tries; + sd->nr_balance_failed = sd->cache_nice_tries+1; } } else sd->nr_balance_failed = 0; @@ -2106,56 +2106,42 @@ static inline void idle_balance(int this_cpu, runqueue_t *this_rq) static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu) { struct sched_domain *sd; - struct sched_group *cpu_group; runqueue_t *target_rq; - cpumask_t visited_cpus; - int cpu; + int target_cpu = busiest_rq->push_cpu; + + if (busiest_rq->nr_running <= 1) + /* no task to move */ + return; + + target_rq = cpu_rq(target_cpu); /* - * Search for suitable CPUs to push tasks to in successively higher - * domains with SD_LOAD_BALANCE set. + * This condition is "impossible", if it occurs + * we need to fix it. Originally reported by + * Bjorn Helgaas on a 128-cpu setup. */ - visited_cpus = CPU_MASK_NONE; - for_each_domain(busiest_cpu, sd) { - if (!(sd->flags & SD_LOAD_BALANCE)) - /* no more domains to search */ - break; + BUG_ON(busiest_rq == target_rq); - schedstat_inc(sd, alb_cnt); + /* move a task from busiest_rq to target_rq */ + double_lock_balance(busiest_rq, target_rq); - cpu_group = sd->groups; - do { - for_each_cpu_mask(cpu, cpu_group->cpumask) { - if (busiest_rq->nr_running <= 1) - /* no more tasks left to move */ - return; - if (cpu_isset(cpu, visited_cpus)) - continue; - cpu_set(cpu, visited_cpus); - if (!cpu_and_siblings_are_idle(cpu) || cpu == busiest_cpu) - continue; - - target_rq = cpu_rq(cpu); - /* - * This condition is "impossible", if it occurs - * we need to fix it. Originally reported by - * Bjorn Helgaas on a 128-cpu setup. - */ - BUG_ON(busiest_rq == target_rq); - - /* move a task from busiest_rq to target_rq */ - double_lock_balance(busiest_rq, target_rq); - if (move_tasks(target_rq, cpu, busiest_rq, - 1, sd, SCHED_IDLE, NULL)) { - schedstat_inc(sd, alb_pushed); - } else { - schedstat_inc(sd, alb_failed); - } - spin_unlock(&target_rq->lock); - } - cpu_group = cpu_group->next; - } while (cpu_group != sd->groups); - } + /* Search for an sd spanning us and the target CPU. */ + for_each_domain(target_cpu, sd) + if ((sd->flags & SD_LOAD_BALANCE) && + cpu_isset(busiest_cpu, sd->span)) + break; + + if (unlikely(sd == NULL)) + goto out; + + schedstat_inc(sd, alb_cnt); + + if (move_tasks(target_rq, target_cpu, busiest_rq, 1, sd, SCHED_IDLE, NULL)) + schedstat_inc(sd, alb_pushed); + else + schedstat_inc(sd, alb_failed); +out: + spin_unlock(&target_rq->lock); } /* -- cgit v1.2.3 From db935dbd43c4290d710304662cc908f733afea06 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 25 Jun 2005 14:57:11 -0700 Subject: [PATCH] sched: add debugging These conditions should now be impossible, and we need to fix them if they happen. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 41e69b5ee65..8b035a8b3c3 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1942,15 +1942,7 @@ static int load_balance(int this_cpu, runqueue_t *this_rq, goto out_balanced; } - /* - * This should be "impossible", but since load - * balancing is inherently racy and statistical, - * it could happen in theory. - */ - if (unlikely(busiest == this_rq)) { - WARN_ON(1); - goto out_balanced; - } + BUG_ON(busiest == this_rq); schedstat_add(sd, lb_imbalance[idle], imbalance); @@ -2052,11 +2044,13 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq, } busiest = find_busiest_queue(group); - if (!busiest || busiest == this_rq) { + if (!busiest) { schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]); goto out_balanced; } + BUG_ON(busiest == this_rq); + /* Attempt to move tasks */ double_lock_balance(this_rq, busiest); -- cgit v1.2.3 From 99b61ccf0bf0e9a85823d39a5db6a1519caeb13d Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 25 Jun 2005 14:57:12 -0700 Subject: [PATCH] sched: less aggressive idle balancing Remove the special casing for idle CPU balancing. Things like this are hurting for example on SMT, where are single sibling being idle doesn't really warrant a really aggressive pull over the NUMA domain, for example. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 8b035a8b3c3..f665de34ed8 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1877,15 +1877,9 @@ nextgroup: /* Get rid of the scaling factor, rounding down as we divide */ *imbalance = *imbalance / SCHED_LOAD_SCALE; - return busiest; out_balanced: - if (busiest && (idle == NEWLY_IDLE || - (idle == SCHED_IDLE && max_load > SCHED_LOAD_SCALE)) ) { - *imbalance = 1; - return busiest; - } *imbalance = 0; return NULL; -- cgit v1.2.3 From 7897986bad8f6cd50d6149345aca7f6480f49464 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 25 Jun 2005 14:57:13 -0700 Subject: [PATCH] sched: balance timers Do CPU load averaging over a number of different intervals. Allow each interval to be chosen by sending a parameter to source_load and target_load. 0 is instantaneous, idx > 0 returns a decaying average with the most recent sample weighted at 2^(idx-1). To a maximum of 3 (could be easily increased). So generally a higher number will result in more conservative balancing. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-i386/topology.h | 4 ++ include/asm-x86_64/topology.h | 6 +- include/linux/sched.h | 4 ++ include/linux/topology.h | 8 +++ kernel/sched.c | 138 ++++++++++++++++++++++-------------------- 5 files changed, 95 insertions(+), 65 deletions(-) diff --git a/include/asm-i386/topology.h b/include/asm-i386/topology.h index 6d0f67507b2..0055fbfeec7 100644 --- a/include/asm-i386/topology.h +++ b/include/asm-i386/topology.h @@ -74,6 +74,10 @@ static inline int node_to_first_cpu(int node) .imbalance_pct = 125, \ .cache_hot_time = (10*1000000), \ .cache_nice_tries = 1, \ + .busy_idx = 3, \ + .idle_idx = 1, \ + .newidle_idx = 2, \ + .wake_idx = 1, \ .per_cpu_gain = 100, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_EXEC \ diff --git a/include/asm-x86_64/topology.h b/include/asm-x86_64/topology.h index 8f77e9f6bc2..fe8d80a1575 100644 --- a/include/asm-x86_64/topology.h +++ b/include/asm-x86_64/topology.h @@ -39,7 +39,11 @@ extern int __node_distance(int, int); .busy_factor = 32, \ .imbalance_pct = 125, \ .cache_hot_time = (10*1000000), \ - .cache_nice_tries = 1, \ + .cache_nice_tries = 2, \ + .busy_idx = 3, \ + .idle_idx = 2, \ + .newidle_idx = 1, \ + .wake_idx = 1, \ .per_cpu_gain = 100, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_NEWIDLE \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 2c69682b044..664981ac1fb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -488,6 +488,10 @@ struct sched_domain { unsigned long long cache_hot_time; /* Task considered cache hot (ns) */ unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */ unsigned int per_cpu_gain; /* CPU % gained by adding domain cpus */ + unsigned int busy_idx; + unsigned int idle_idx; + unsigned int newidle_idx; + unsigned int wake_idx; int flags; /* See SD_* */ /* Runtime fields. */ diff --git a/include/linux/topology.h b/include/linux/topology.h index d70e8972c67..ae9c2216dfa 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -89,6 +89,10 @@ .cache_hot_time = 0, \ .cache_nice_tries = 0, \ .per_cpu_gain = 25, \ + .busy_idx = 0, \ + .idle_idx = 0, \ + .newidle_idx = 0, \ + .wake_idx = 0, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_NEWIDLE \ | SD_BALANCE_EXEC \ @@ -115,6 +119,10 @@ .cache_hot_time = (5*1000000/2), \ .cache_nice_tries = 1, \ .per_cpu_gain = 100, \ + .busy_idx = 2, \ + .idle_idx = 0, \ + .newidle_idx = 1, \ + .wake_idx = 1, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_NEWIDLE \ | SD_BALANCE_EXEC \ diff --git a/kernel/sched.c b/kernel/sched.c index f665de34ed8..b597b07e791 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -206,7 +206,7 @@ struct runqueue { */ unsigned long nr_running; #ifdef CONFIG_SMP - unsigned long cpu_load; + unsigned long cpu_load[3]; #endif unsigned long long nr_switches; @@ -886,23 +886,27 @@ void kick_process(task_t *p) * We want to under-estimate the load of migration sources, to * balance conservatively. */ -static inline unsigned long source_load(int cpu) +static inline unsigned long source_load(int cpu, int type) { runqueue_t *rq = cpu_rq(cpu); unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; + if (type == 0) + return load_now; - return min(rq->cpu_load, load_now); + return min(rq->cpu_load[type-1], load_now); } /* * Return a high guess at the load of a migration-target cpu */ -static inline unsigned long target_load(int cpu) +static inline unsigned long target_load(int cpu, int type) { runqueue_t *rq = cpu_rq(cpu); unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; + if (type == 0) + return load_now; - return max(rq->cpu_load, load_now); + return max(rq->cpu_load[type-1], load_now); } #endif @@ -967,7 +971,7 @@ static int try_to_wake_up(task_t * p, unsigned int state, int sync) runqueue_t *rq; #ifdef CONFIG_SMP unsigned long load, this_load; - struct sched_domain *sd; + struct sched_domain *sd, *this_sd = NULL; int new_cpu; #endif @@ -986,72 +990,64 @@ static int try_to_wake_up(task_t * p, unsigned int state, int sync) if (unlikely(task_running(rq, p))) goto out_activate; -#ifdef CONFIG_SCHEDSTATS + new_cpu = cpu; + schedstat_inc(rq, ttwu_cnt); if (cpu == this_cpu) { schedstat_inc(rq, ttwu_local); - } else { - for_each_domain(this_cpu, sd) { - if (cpu_isset(cpu, sd->span)) { - schedstat_inc(sd, ttwu_wake_remote); - break; - } + goto out_set_cpu; + } + + for_each_domain(this_cpu, sd) { + if (cpu_isset(cpu, sd->span)) { + schedstat_inc(sd, ttwu_wake_remote); + this_sd = sd; + break; } } -#endif - new_cpu = cpu; - if (cpu == this_cpu || unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) + if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) goto out_set_cpu; - load = source_load(cpu); - this_load = target_load(this_cpu); - /* - * If sync wakeup then subtract the (maximum possible) effect of - * the currently running task from the load of the current CPU: + * Check for affine wakeup and passive balancing possibilities. */ - if (sync) - this_load -= SCHED_LOAD_SCALE; - - /* Don't pull the task off an idle CPU to a busy one */ - if (load < SCHED_LOAD_SCALE/2 && this_load > SCHED_LOAD_SCALE/2) - goto out_set_cpu; + if (this_sd) { + int idx = this_sd->wake_idx; + unsigned int imbalance; - new_cpu = this_cpu; /* Wake to this CPU if we can */ + load = source_load(cpu, idx); + this_load = target_load(this_cpu, idx); - /* - * Scan domains for affine wakeup and passive balancing - * possibilities. - */ - for_each_domain(this_cpu, sd) { - unsigned int imbalance; /* - * Start passive balancing when half the imbalance_pct - * limit is reached. + * If sync wakeup then subtract the (maximum possible) effect of + * the currently running task from the load of the current CPU: */ - imbalance = sd->imbalance_pct + (sd->imbalance_pct - 100) / 2; + if (sync) + this_load -= SCHED_LOAD_SCALE; + + /* Don't pull the task off an idle CPU to a busy one */ + if (load < SCHED_LOAD_SCALE/2 && this_load > SCHED_LOAD_SCALE/2) + goto out_set_cpu; - if ((sd->flags & SD_WAKE_AFFINE) && - !task_hot(p, rq->timestamp_last_tick, sd)) { + new_cpu = this_cpu; /* Wake to this CPU if we can */ + + if ((this_sd->flags & SD_WAKE_AFFINE) && + !task_hot(p, rq->timestamp_last_tick, this_sd)) { /* * This domain has SD_WAKE_AFFINE and p is cache cold * in this domain. */ - if (cpu_isset(cpu, sd->span)) { - schedstat_inc(sd, ttwu_move_affine); - goto out_set_cpu; - } - } else if ((sd->flags & SD_WAKE_BALANCE) && + schedstat_inc(this_sd, ttwu_move_affine); + goto out_set_cpu; + } else if ((this_sd->flags & SD_WAKE_BALANCE) && imbalance*this_load <= 100*load) { /* * This domain has SD_WAKE_BALANCE and there is * an imbalance. */ - if (cpu_isset(cpu, sd->span)) { - schedstat_inc(sd, ttwu_move_balance); - goto out_set_cpu; - } + schedstat_inc(this_sd, ttwu_move_balance); + goto out_set_cpu; } } @@ -1509,7 +1505,7 @@ static int find_idlest_cpu(struct task_struct *p, int this_cpu, cpus_and(mask, sd->span, p->cpus_allowed); for_each_cpu_mask(i, mask) { - load = target_load(i); + load = target_load(i, sd->wake_idx); if (load < min_load) { min_cpu = i; @@ -1522,7 +1518,7 @@ static int find_idlest_cpu(struct task_struct *p, int this_cpu, } /* add +1 to account for the new task */ - this_load = source_load(this_cpu) + SCHED_LOAD_SCALE; + this_load = source_load(this_cpu, sd->wake_idx) + SCHED_LOAD_SCALE; /* * Would with the addition of the new task to the @@ -1767,8 +1763,15 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, { struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; unsigned long max_load, avg_load, total_load, this_load, total_pwr; + int load_idx; max_load = this_load = total_load = total_pwr = 0; + if (idle == NOT_IDLE) + load_idx = sd->busy_idx; + else if (idle == NEWLY_IDLE) + load_idx = sd->newidle_idx; + else + load_idx = sd->idle_idx; do { unsigned long load; @@ -1783,9 +1786,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, for_each_cpu_mask(i, group->cpumask) { /* Bias balancing toward cpus of our domain */ if (local_group) - load = target_load(i); + load = target_load(i, load_idx); else - load = source_load(i); + load = source_load(i, load_idx); avg_load += load; } @@ -1895,7 +1898,7 @@ static runqueue_t *find_busiest_queue(struct sched_group *group) int i; for_each_cpu_mask(i, group->cpumask) { - load = source_load(i); + load = source_load(i, 0); if (load > max_load) { max_load = load; @@ -2150,18 +2153,23 @@ static void rebalance_tick(int this_cpu, runqueue_t *this_rq, unsigned long old_load, this_load; unsigned long j = jiffies + CPU_OFFSET(this_cpu); struct sched_domain *sd; + int i; - /* Update our load */ - old_load = this_rq->cpu_load; this_load = this_rq->nr_running * SCHED_LOAD_SCALE; - /* - * Round up the averaging division if load is increasing. This - * prevents us from getting stuck on 9 if the load is 10, for - * example. - */ - if (this_load > old_load) - old_load++; - this_rq->cpu_load = (old_load + this_load) / 2; + /* Update our load */ + for (i = 0; i < 3; i++) { + unsigned long new_load = this_load; + int scale = 1 << i; + old_load = this_rq->cpu_load[i]; + /* + * Round up the averaging division if load is increasing. This + * prevents us from getting stuck on 9 if the load is 10, for + * example. + */ + if (new_load > old_load) + new_load += scale-1; + this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) / scale; + } for_each_domain(this_cpu, sd) { unsigned long interval; @@ -4921,13 +4929,15 @@ void __init sched_init(void) rq = cpu_rq(i); spin_lock_init(&rq->lock); + rq->nr_running = 0; rq->active = rq->arrays; rq->expired = rq->arrays + 1; rq->best_expired_prio = MAX_PRIO; #ifdef CONFIG_SMP rq->sd = &sched_domain_dummy; - rq->cpu_load = 0; + for (j = 1; j < 3; j++) + rq->cpu_load[j] = 0; rq->active_balance = 0; rq->push_cpu = 0; rq->migration_thread = NULL; -- cgit v1.2.3 From a3f21bce1fefdf92a4d1705e888d390b10f3ac6f Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 25 Jun 2005 14:57:15 -0700 Subject: [PATCH] sched: tweak affine wakeups Do less affine wakeups. We're trying to reduce dbt2-pgsql idle time regressions here... make sure we don't don't move tasks the wrong way in an imbalance condition. Also, remove the cache coldness requirement from the calculation - this seems to induce sharp cutoff points where behaviour will suddenly change on some workloads if the load creeps slightly over or under some point. It is good for periodic balancing because in that case have otherwise have no other context to determine what task to move. But also make a minor tweak to "wake balancing" - the imbalance tolerance is now set at half the domain's imbalance, so we get the opportunity to do wake balancing before the more random periodic rebalancing gets preformed. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched.c | 57 ++++++++++++++++++++++++++++++++------------------------- 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index b597b07e791..5ae3568eed0 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1016,38 +1016,45 @@ static int try_to_wake_up(task_t * p, unsigned int state, int sync) int idx = this_sd->wake_idx; unsigned int imbalance; + imbalance = 100 + (this_sd->imbalance_pct - 100) / 2; + load = source_load(cpu, idx); this_load = target_load(this_cpu, idx); - /* - * If sync wakeup then subtract the (maximum possible) effect of - * the currently running task from the load of the current CPU: - */ - if (sync) - this_load -= SCHED_LOAD_SCALE; - - /* Don't pull the task off an idle CPU to a busy one */ - if (load < SCHED_LOAD_SCALE/2 && this_load > SCHED_LOAD_SCALE/2) - goto out_set_cpu; - new_cpu = this_cpu; /* Wake to this CPU if we can */ - if ((this_sd->flags & SD_WAKE_AFFINE) && - !task_hot(p, rq->timestamp_last_tick, this_sd)) { - /* - * This domain has SD_WAKE_AFFINE and p is cache cold - * in this domain. - */ - schedstat_inc(this_sd, ttwu_move_affine); - goto out_set_cpu; - } else if ((this_sd->flags & SD_WAKE_BALANCE) && - imbalance*this_load <= 100*load) { + if (this_sd->flags & SD_WAKE_AFFINE) { + unsigned long tl = this_load; /* - * This domain has SD_WAKE_BALANCE and there is - * an imbalance. + * If sync wakeup then subtract the (maximum possible) + * effect of the currently running task from the load + * of the current CPU: */ - schedstat_inc(this_sd, ttwu_move_balance); - goto out_set_cpu; + if (sync) + tl -= SCHED_LOAD_SCALE; + + if ((tl <= load && + tl + target_load(cpu, idx) <= SCHED_LOAD_SCALE) || + 100*(tl + SCHED_LOAD_SCALE) <= imbalance*load) { + /* + * This domain has SD_WAKE_AFFINE and + * p is cache cold in this domain, and + * there is no bad imbalance. + */ + schedstat_inc(this_sd, ttwu_move_affine); + goto out_set_cpu; + } + } + + /* + * Start passive balancing when half the imbalance_pct + * limit is reached. + */ + if (this_sd->flags & SD_WAKE_BALANCE) { + if (imbalance*this_load <= 100*load) { + schedstat_inc(this_sd, ttwu_move_balance); + goto out_set_cpu; + } } } -- cgit v1.2.3 From cafb20c1f9976a70d633bb1e1c8c24eab00e4e80 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 25 Jun 2005 14:57:17 -0700 Subject: [PATCH] sched: no aggressive idle balancing Remove the very aggressive idle stuff that has recently gone into 2.6 - it is going against the direction we are trying to go. Hopefully we can regain performance through other methods. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-i386/topology.h | 1 - include/asm-x86_64/topology.h | 1 - include/linux/topology.h | 1 - kernel/sched.c | 21 ++------------------- 4 files changed, 2 insertions(+), 22 deletions(-) diff --git a/include/asm-i386/topology.h b/include/asm-i386/topology.h index 0055fbfeec7..5eb6f61dcef 100644 --- a/include/asm-i386/topology.h +++ b/include/asm-i386/topology.h @@ -82,7 +82,6 @@ static inline int node_to_first_cpu(int node) .flags = SD_LOAD_BALANCE \ | SD_BALANCE_EXEC \ | SD_BALANCE_NEWIDLE \ - | SD_WAKE_IDLE \ | SD_WAKE_BALANCE, \ .last_balance = jiffies, \ .balance_interval = 1, \ diff --git a/include/asm-x86_64/topology.h b/include/asm-x86_64/topology.h index fe8d80a1575..9cb7459ce72 100644 --- a/include/asm-x86_64/topology.h +++ b/include/asm-x86_64/topology.h @@ -48,7 +48,6 @@ extern int __node_distance(int, int); .flags = SD_LOAD_BALANCE \ | SD_BALANCE_NEWIDLE \ | SD_BALANCE_EXEC \ - | SD_WAKE_IDLE \ | SD_WAKE_BALANCE, \ .last_balance = jiffies, \ .balance_interval = 1, \ diff --git a/include/linux/topology.h b/include/linux/topology.h index ae9c2216dfa..b23ec64df7f 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -127,7 +127,6 @@ | SD_BALANCE_NEWIDLE \ | SD_BALANCE_EXEC \ | SD_WAKE_AFFINE \ - | SD_WAKE_IDLE \ | SD_WAKE_BALANCE, \ .last_balance = jiffies, \ .balance_interval = 1, \ diff --git a/kernel/sched.c b/kernel/sched.c index 5ae3568eed0..396724a2519 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -414,22 +414,6 @@ static inline runqueue_t *this_rq_lock(void) return rq; } -#ifdef CONFIG_SCHED_SMT -static int cpu_and_siblings_are_idle(int cpu) -{ - int sib; - for_each_cpu_mask(sib, cpu_sibling_map[cpu]) { - if (idle_cpu(sib)) - continue; - return 0; - } - - return 1; -} -#else -#define cpu_and_siblings_are_idle(A) idle_cpu(A) -#endif - #ifdef CONFIG_SCHEDSTATS /* * Called when a process is dequeued from the active array and given @@ -1652,12 +1636,11 @@ int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu, /* * Aggressive migration if: - * 1) the [whole] cpu is idle, or + * 1) task is cache cold, or * 2) too many balance attempts have failed. */ - if (cpu_and_siblings_are_idle(this_cpu) || \ - sd->nr_balance_failed > sd->cache_nice_tries) + if (sd->nr_balance_failed > sd->cache_nice_tries) return 1; if (task_hot(p, rq->timestamp_last_tick, sd)) -- cgit v1.2.3 From 147cbb4bbe991452698f0772d8292f22825710ba Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 25 Jun 2005 14:57:19 -0700 Subject: [PATCH] sched: balance on fork Reimplement the balance on exec balancing to be sched-domains aware. Use this to also do balance on fork balancing. Make x86_64 do balance on fork over the NUMA domain. The problem that the non sched domains aware blancing became apparent on dual core, multi socket opterons. What we want is for the new tasks to be sent to a different socket, but more often than not, we would first load up our sibling core, or fill two cores of a single remote socket before selecting a new one. This gives large improvements to STREAM on such systems. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-x86_64/topology.h | 2 + include/linux/sched.h | 10 +-- include/linux/topology.h | 2 + kernel/sched.c | 164 ++++++++++++++++++++++++++++-------------- 4 files changed, 119 insertions(+), 59 deletions(-) diff --git a/include/asm-x86_64/topology.h b/include/asm-x86_64/topology.h index 9cb7459ce72..802d09b9c99 100644 --- a/include/asm-x86_64/topology.h +++ b/include/asm-x86_64/topology.h @@ -44,9 +44,11 @@ extern int __node_distance(int, int); .idle_idx = 2, \ .newidle_idx = 1, \ .wake_idx = 1, \ + .forkexec_idx = 1, \ .per_cpu_gain = 100, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_NEWIDLE \ + | SD_BALANCE_FORK \ | SD_BALANCE_EXEC \ | SD_WAKE_BALANCE, \ .last_balance = jiffies, \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 664981ac1fb..613491d3a87 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -460,10 +460,11 @@ enum idle_type #define SD_LOAD_BALANCE 1 /* Do load balancing on this domain. */ #define SD_BALANCE_NEWIDLE 2 /* Balance when about to become idle */ #define SD_BALANCE_EXEC 4 /* Balance on exec */ -#define SD_WAKE_IDLE 8 /* Wake to idle CPU on task wakeup */ -#define SD_WAKE_AFFINE 16 /* Wake task to waking CPU */ -#define SD_WAKE_BALANCE 32 /* Perform balancing at task wakeup */ -#define SD_SHARE_CPUPOWER 64 /* Domain members share cpu power */ +#define SD_BALANCE_FORK 8 /* Balance on fork, clone */ +#define SD_WAKE_IDLE 16 /* Wake to idle CPU on task wakeup */ +#define SD_WAKE_AFFINE 32 /* Wake task to waking CPU */ +#define SD_WAKE_BALANCE 64 /* Perform balancing at task wakeup */ +#define SD_SHARE_CPUPOWER 128 /* Domain members share cpu power */ struct sched_group { struct sched_group *next; /* Must be a circular list */ @@ -492,6 +493,7 @@ struct sched_domain { unsigned int idle_idx; unsigned int newidle_idx; unsigned int wake_idx; + unsigned int forkexec_idx; int flags; /* See SD_* */ /* Runtime fields. */ diff --git a/include/linux/topology.h b/include/linux/topology.h index b23ec64df7f..665597207de 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -93,6 +93,7 @@ .idle_idx = 0, \ .newidle_idx = 0, \ .wake_idx = 0, \ + .forkexec_idx = 0, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_NEWIDLE \ | SD_BALANCE_EXEC \ @@ -123,6 +124,7 @@ .idle_idx = 0, \ .newidle_idx = 1, \ .wake_idx = 1, \ + .forkexec_idx = 0, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_NEWIDLE \ | SD_BALANCE_EXEC \ diff --git a/kernel/sched.c b/kernel/sched.c index 396724a2519..7ecc237e2aa 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -893,6 +893,79 @@ static inline unsigned long target_load(int cpu, int type) return max(rq->cpu_load[type-1], load_now); } +/* + * find_idlest_group finds and returns the least busy CPU group within the + * domain. + */ +static struct sched_group * +find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) +{ + struct sched_group *idlest = NULL, *this = NULL, *group = sd->groups; + unsigned long min_load = ULONG_MAX, this_load = 0; + int load_idx = sd->forkexec_idx; + int imbalance = 100 + (sd->imbalance_pct-100)/2; + + do { + unsigned long load, avg_load; + int local_group; + int i; + + local_group = cpu_isset(this_cpu, group->cpumask); + /* XXX: put a cpus allowed check */ + + /* Tally up the load of all CPUs in the group */ + avg_load = 0; + + for_each_cpu_mask(i, group->cpumask) { + /* Bias balancing toward cpus of our domain */ + if (local_group) + load = source_load(i, load_idx); + else + load = target_load(i, load_idx); + + avg_load += load; + } + + /* Adjust by relative CPU power of the group */ + avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power; + + if (local_group) { + this_load = avg_load; + this = group; + } else if (avg_load < min_load) { + min_load = avg_load; + idlest = group; + } + group = group->next; + } while (group != sd->groups); + + if (!idlest || 100*this_load < imbalance*min_load) + return NULL; + return idlest; +} + +/* + * find_idlest_queue - find the idlest runqueue among the cpus in group. + */ +static int find_idlest_cpu(struct sched_group *group, int this_cpu) +{ + unsigned long load, min_load = ULONG_MAX; + int idlest = -1; + int i; + + for_each_cpu_mask(i, group->cpumask) { + load = source_load(i, 0); + + if (load < min_load || (load == min_load && i == this_cpu)) { + min_load = load; + idlest = i; + } + } + + return idlest; +} + + #endif /* @@ -1107,11 +1180,6 @@ int fastcall wake_up_state(task_t *p, unsigned int state) return try_to_wake_up(p, state, 0); } -#ifdef CONFIG_SMP -static int find_idlest_cpu(struct task_struct *p, int this_cpu, - struct sched_domain *sd); -#endif - /* * Perform scheduler related setup for a newly forked process p. * p is forked by current. @@ -1181,12 +1249,38 @@ void fastcall wake_up_new_task(task_t * p, unsigned long clone_flags) unsigned long flags; int this_cpu, cpu; runqueue_t *rq, *this_rq; +#ifdef CONFIG_SMP + struct sched_domain *tmp, *sd = NULL; +#endif rq = task_rq_lock(p, &flags); - cpu = task_cpu(p); + BUG_ON(p->state != TASK_RUNNING); this_cpu = smp_processor_id(); + cpu = task_cpu(p); - BUG_ON(p->state != TASK_RUNNING); +#ifdef CONFIG_SMP + for_each_domain(cpu, tmp) + if (tmp->flags & SD_BALANCE_FORK) + sd = tmp; + + if (sd) { + struct sched_group *group; + + cpu = task_cpu(p); + group = find_idlest_group(sd, p, cpu); + if (group) { + int new_cpu; + new_cpu = find_idlest_cpu(group, cpu); + if (new_cpu != -1 && new_cpu != cpu && + cpu_isset(new_cpu, p->cpus_allowed)) { + set_task_cpu(p, new_cpu); + task_rq_unlock(rq, &flags); + rq = task_rq_lock(p, &flags); + cpu = task_cpu(p); + } + } + } +#endif /* * We decrease the sleep average of forking parents @@ -1480,51 +1574,6 @@ static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest) } } -/* - * find_idlest_cpu - find the least busy runqueue. - */ -static int find_idlest_cpu(struct task_struct *p, int this_cpu, - struct sched_domain *sd) -{ - unsigned long load, min_load, this_load; - int i, min_cpu; - cpumask_t mask; - - min_cpu = UINT_MAX; - min_load = ULONG_MAX; - - cpus_and(mask, sd->span, p->cpus_allowed); - - for_each_cpu_mask(i, mask) { - load = target_load(i, sd->wake_idx); - - if (load < min_load) { - min_cpu = i; - min_load = load; - - /* break out early on an idle CPU: */ - if (!min_load) - break; - } - } - - /* add +1 to account for the new task */ - this_load = source_load(this_cpu, sd->wake_idx) + SCHED_LOAD_SCALE; - - /* - * Would with the addition of the new task to the - * current CPU there be an imbalance between this - * CPU and the idlest CPU? - * - * Use half of the balancing threshold - new-context is - * a good opportunity to balance. - */ - if (min_load*(100 + (sd->imbalance_pct-100)/2) < this_load*100) - return min_cpu; - - return this_cpu; -} - /* * If dest_cpu is allowed for this process, migrate the task to it. * This is accomplished by forcing the cpu_allowed mask to only @@ -1578,8 +1627,15 @@ void sched_exec(void) sd = tmp; if (sd) { + struct sched_group *group; schedstat_inc(sd, sbe_attempts); - new_cpu = find_idlest_cpu(current, this_cpu, sd); + group = find_idlest_group(sd, current, this_cpu); + if (!group) + goto out; + new_cpu = find_idlest_cpu(group, this_cpu); + if (new_cpu == -1) + goto out; + if (new_cpu != this_cpu) { schedstat_inc(sd, sbe_pushed); put_cpu(); @@ -1792,12 +1848,10 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, if (local_group) { this_load = avg_load; this = group; - goto nextgroup; } else if (avg_load > max_load) { max_load = avg_load; busiest = group; } -nextgroup: group = group->next; } while (group != sd->groups); -- cgit v1.2.3 From 68767a0ae428801649d510d9a65bb71feed44dd1 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 25 Jun 2005 14:57:20 -0700 Subject: [PATCH] sched: schedstats update for balance on fork Add SCHEDSTAT statistics for sched-balance-fork. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 10 ++++++-- kernel/sched.c | 63 +++++++++++++++++++++++++++++---------------------- 2 files changed, 44 insertions(+), 29 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 613491d3a87..36a10781c3f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -517,10 +517,16 @@ struct sched_domain { unsigned long alb_failed; unsigned long alb_pushed; - /* sched_balance_exec() stats */ - unsigned long sbe_attempts; + /* SD_BALANCE_EXEC stats */ + unsigned long sbe_cnt; + unsigned long sbe_balanced; unsigned long sbe_pushed; + /* SD_BALANCE_FORK stats */ + unsigned long sbf_cnt; + unsigned long sbf_balanced; + unsigned long sbf_pushed; + /* try_to_wake_up() stats */ unsigned long ttwu_wake_remote; unsigned long ttwu_move_affine; diff --git a/kernel/sched.c b/kernel/sched.c index 7ecc237e2aa..2711130cd97 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -309,7 +309,7 @@ static inline void task_rq_unlock(runqueue_t *rq, unsigned long *flags) * bump this up when changing the output format or the meaning of an existing * format, so that tools can adapt (or abort) */ -#define SCHEDSTAT_VERSION 11 +#define SCHEDSTAT_VERSION 12 static int show_schedstat(struct seq_file *seq, void *v) { @@ -356,9 +356,10 @@ static int show_schedstat(struct seq_file *seq, void *v) sd->lb_nobusyq[itype], sd->lb_nobusyg[itype]); } - seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu\n", + seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n", sd->alb_cnt, sd->alb_failed, sd->alb_pushed, - sd->sbe_pushed, sd->sbe_attempts, + sd->sbe_cnt, sd->sbe_balanced, sd->sbe_pushed, + sd->sbf_cnt, sd->sbf_balanced, sd->sbf_pushed, sd->ttwu_wake_remote, sd->ttwu_move_affine, sd->ttwu_move_balance); } #endif @@ -1264,24 +1265,34 @@ void fastcall wake_up_new_task(task_t * p, unsigned long clone_flags) sd = tmp; if (sd) { + int new_cpu; struct sched_group *group; + schedstat_inc(sd, sbf_cnt); cpu = task_cpu(p); group = find_idlest_group(sd, p, cpu); - if (group) { - int new_cpu; - new_cpu = find_idlest_cpu(group, cpu); - if (new_cpu != -1 && new_cpu != cpu && - cpu_isset(new_cpu, p->cpus_allowed)) { - set_task_cpu(p, new_cpu); - task_rq_unlock(rq, &flags); - rq = task_rq_lock(p, &flags); - cpu = task_cpu(p); - } + if (!group) { + schedstat_inc(sd, sbf_balanced); + goto no_forkbalance; + } + + new_cpu = find_idlest_cpu(group, cpu); + if (new_cpu == -1 || new_cpu == cpu) { + schedstat_inc(sd, sbf_balanced); + goto no_forkbalance; + } + + if (cpu_isset(new_cpu, p->cpus_allowed)) { + schedstat_inc(sd, sbf_pushed); + set_task_cpu(p, new_cpu); + task_rq_unlock(rq, &flags); + rq = task_rq_lock(p, &flags); + cpu = task_cpu(p); } } -#endif +no_forkbalance: +#endif /* * We decrease the sleep average of forking parents * and children as well, to keep max-interactive tasks @@ -1618,30 +1629,28 @@ void sched_exec(void) struct sched_domain *tmp, *sd = NULL; int new_cpu, this_cpu = get_cpu(); - /* Prefer the current CPU if there's only this task running */ - if (this_rq()->nr_running <= 1) - goto out; - for_each_domain(this_cpu, tmp) if (tmp->flags & SD_BALANCE_EXEC) sd = tmp; if (sd) { struct sched_group *group; - schedstat_inc(sd, sbe_attempts); + schedstat_inc(sd, sbe_cnt); group = find_idlest_group(sd, current, this_cpu); - if (!group) + if (!group) { + schedstat_inc(sd, sbe_balanced); goto out; + } new_cpu = find_idlest_cpu(group, this_cpu); - if (new_cpu == -1) + if (new_cpu == -1 || new_cpu == this_cpu) { + schedstat_inc(sd, sbe_balanced); goto out; - - if (new_cpu != this_cpu) { - schedstat_inc(sd, sbe_pushed); - put_cpu(); - sched_migrate_task(current, new_cpu); - return; } + + schedstat_inc(sd, sbe_pushed); + put_cpu(); + sched_migrate_task(current, new_cpu); + return; } out: put_cpu(); -- cgit v1.2.3 From 687f1661d302bc70ce906594a6d3f615ef075a50 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 25 Jun 2005 14:57:21 -0700 Subject: [PATCH] sched: sched tuning Do some basic initial tuning. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/kernel/domain.c | 2 +- include/asm-i386/topology.h | 2 +- include/asm-ia64/topology.h | 61 +++++++++++++++++++++++++++++++++---------- include/asm-x86_64/topology.h | 3 +-- include/linux/topology.h | 11 ++++---- 5 files changed, 55 insertions(+), 24 deletions(-) diff --git a/arch/ia64/kernel/domain.c b/arch/ia64/kernel/domain.c index fe532c97043..afbde79c3b3 100644 --- a/arch/ia64/kernel/domain.c +++ b/arch/ia64/kernel/domain.c @@ -14,7 +14,7 @@ #include #include -#define SD_NODES_PER_DOMAIN 6 +#define SD_NODES_PER_DOMAIN 16 #ifdef CONFIG_NUMA /** diff --git a/include/asm-i386/topology.h b/include/asm-i386/topology.h index 5eb6f61dcef..2461b731781 100644 --- a/include/asm-i386/topology.h +++ b/include/asm-i386/topology.h @@ -81,7 +81,7 @@ static inline int node_to_first_cpu(int node) .per_cpu_gain = 100, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_EXEC \ - | SD_BALANCE_NEWIDLE \ + | SD_BALANCE_FORK \ | SD_WAKE_BALANCE, \ .last_balance = jiffies, \ .balance_interval = 1, \ diff --git a/include/asm-ia64/topology.h b/include/asm-ia64/topology.h index 21cf351fd05..4e64c2a6b36 100644 --- a/include/asm-ia64/topology.h +++ b/include/asm-ia64/topology.h @@ -42,25 +42,54 @@ void build_cpu_to_node_map(void); +#define SD_CPU_INIT (struct sched_domain) { \ + .span = CPU_MASK_NONE, \ + .parent = NULL, \ + .groups = NULL, \ + .min_interval = 1, \ + .max_interval = 4, \ + .busy_factor = 64, \ + .imbalance_pct = 125, \ + .cache_hot_time = (10*1000000), \ + .per_cpu_gain = 100, \ + .cache_nice_tries = 2, \ + .busy_idx = 2, \ + .idle_idx = 1, \ + .newidle_idx = 2, \ + .wake_idx = 1, \ + .forkexec_idx = 1, \ + .flags = SD_LOAD_BALANCE \ + | SD_BALANCE_NEWIDLE \ + | SD_BALANCE_EXEC \ + | SD_WAKE_AFFINE, \ + .last_balance = jiffies, \ + .balance_interval = 1, \ + .nr_balance_failed = 0, \ +} + /* sched_domains SD_NODE_INIT for IA64 NUMA machines */ #define SD_NODE_INIT (struct sched_domain) { \ .span = CPU_MASK_NONE, \ .parent = NULL, \ .groups = NULL, \ - .min_interval = 80, \ - .max_interval = 320, \ - .busy_factor = 320, \ + .min_interval = 8, \ + .max_interval = 8*(min(num_online_cpus(), 32)), \ + .busy_factor = 64, \ .imbalance_pct = 125, \ .cache_hot_time = (10*1000000), \ - .cache_nice_tries = 1, \ + .cache_nice_tries = 2, \ + .busy_idx = 3, \ + .idle_idx = 2, \ + .newidle_idx = 0, /* unused */ \ + .wake_idx = 1, \ + .forkexec_idx = 1, \ .per_cpu_gain = 100, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_EXEC \ - | SD_BALANCE_NEWIDLE \ - | SD_WAKE_IDLE \ + | SD_BALANCE_FORK \ | SD_WAKE_BALANCE, \ .last_balance = jiffies, \ - .balance_interval = 1, \ + .balance_interval = 64, \ .nr_balance_failed = 0, \ } @@ -69,17 +98,21 @@ void build_cpu_to_node_map(void); .span = CPU_MASK_NONE, \ .parent = NULL, \ .groups = NULL, \ - .min_interval = 80, \ - .max_interval = 320, \ - .busy_factor = 320, \ - .imbalance_pct = 125, \ + .min_interval = 64, \ + .max_interval = 64*num_online_cpus(), \ + .busy_factor = 128, \ + .imbalance_pct = 133, \ .cache_hot_time = (10*1000000), \ .cache_nice_tries = 1, \ + .busy_idx = 3, \ + .idle_idx = 3, \ + .newidle_idx = 0, /* unused */ \ + .wake_idx = 0, /* unused */ \ + .forkexec_idx = 0, /* unused */ \ .per_cpu_gain = 100, \ - .flags = SD_LOAD_BALANCE \ - | SD_BALANCE_EXEC, \ + .flags = SD_LOAD_BALANCE, \ .last_balance = jiffies, \ - .balance_interval = 100*(63+num_online_cpus())/64, \ + .balance_interval = 64, \ .nr_balance_failed = 0, \ } diff --git a/include/asm-x86_64/topology.h b/include/asm-x86_64/topology.h index 802d09b9c99..c1bc3fad482 100644 --- a/include/asm-x86_64/topology.h +++ b/include/asm-x86_64/topology.h @@ -42,12 +42,11 @@ extern int __node_distance(int, int); .cache_nice_tries = 2, \ .busy_idx = 3, \ .idle_idx = 2, \ - .newidle_idx = 1, \ + .newidle_idx = 0, \ .wake_idx = 1, \ .forkexec_idx = 1, \ .per_cpu_gain = 100, \ .flags = SD_LOAD_BALANCE \ - | SD_BALANCE_NEWIDLE \ | SD_BALANCE_FORK \ | SD_BALANCE_EXEC \ | SD_WAKE_BALANCE, \ diff --git a/include/linux/topology.h b/include/linux/topology.h index 665597207de..0320225e96d 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -91,7 +91,7 @@ .per_cpu_gain = 25, \ .busy_idx = 0, \ .idle_idx = 0, \ - .newidle_idx = 0, \ + .newidle_idx = 1, \ .wake_idx = 0, \ .forkexec_idx = 0, \ .flags = SD_LOAD_BALANCE \ @@ -121,15 +121,14 @@ .cache_nice_tries = 1, \ .per_cpu_gain = 100, \ .busy_idx = 2, \ - .idle_idx = 0, \ - .newidle_idx = 1, \ + .idle_idx = 1, \ + .newidle_idx = 2, \ .wake_idx = 1, \ - .forkexec_idx = 0, \ + .forkexec_idx = 1, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_NEWIDLE \ | SD_BALANCE_EXEC \ - | SD_WAKE_AFFINE \ - | SD_WAKE_BALANCE, \ + | SD_WAKE_AFFINE, \ .last_balance = jiffies, \ .balance_interval = 1, \ .nr_balance_failed = 0, \ -- cgit v1.2.3 From 48c08d3f8ff94fa118187e4d8d4a5707bb85e59d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 25 Jun 2005 14:57:22 -0700 Subject: [PATCH] sched: uninline task_timeslice "Chen, Kenneth W" uninline task_timeslice() - reduces code footprint noticeably, and it's slowpath code. Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched.c b/kernel/sched.c index 2711130cd97..98bf1c091da 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -166,7 +166,7 @@ #define SCALE_PRIO(x, prio) \ max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO/2), MIN_TIMESLICE) -static inline unsigned int task_timeslice(task_t *p) +static unsigned int task_timeslice(task_t *p) { if (p->static_prio < NICE_TO_PRIO(0)) return SCALE_PRIO(DEF_TIMESLICE*4, p->static_prio); -- cgit v1.2.3 From 4866cde064afbb6c2a488c265e696879de616daa Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 25 Jun 2005 14:57:23 -0700 Subject: [PATCH] sched: cleanup context switch locking Instead of requiring architecture code to interact with the scheduler's locking implementation, provide a couple of defines that can be used by the architecture to request runqueue unlocked context switches, and ask for interrupts to be enabled over the context switch. Also replaces the "switch_lock" used by these architectures with an oncpu flag (note, not a potentially slow bitflag). This eliminates one bus locked memory operation when context switching, and simplifies the task_running function. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-arm/system.h | 30 ++-------- include/asm-ia64/system.h | 10 +--- include/asm-mips/system.h | 10 +--- include/asm-s390/system.h | 17 +----- include/asm-sparc/system.h | 4 +- include/asm-sparc64/system.h | 14 ++--- include/linux/init_task.h | 1 - include/linux/sched.h | 10 +++- kernel/sched.c | 132 +++++++++++++++++++++++++++++++++++-------- 9 files changed, 131 insertions(+), 97 deletions(-) diff --git a/include/asm-arm/system.h b/include/asm-arm/system.h index 39dd7008013..3d0d2860b6d 100644 --- a/include/asm-arm/system.h +++ b/include/asm-arm/system.h @@ -145,34 +145,12 @@ extern unsigned int user_debug; #define set_wmb(var, value) do { var = value; wmb(); } while (0) #define nop() __asm__ __volatile__("mov\tr0,r0\t@ nop\n\t"); -#ifdef CONFIG_SMP /* - * Define our own context switch locking. This allows us to enable - * interrupts over the context switch, otherwise we end up with high - * interrupt latency. The real problem area is switch_mm() which may - * do a full cache flush. + * switch_mm() may do a full cache flush over the context switch, + * so enable interrupts over the context switch to avoid high + * latency. */ -#define prepare_arch_switch(rq,next) \ -do { \ - spin_lock(&(next)->switch_lock); \ - spin_unlock_irq(&(rq)->lock); \ -} while (0) - -#define finish_arch_switch(rq,prev) \ - spin_unlock(&(prev)->switch_lock) - -#define task_running(rq,p) \ - ((rq)->curr == (p) || spin_is_locked(&(p)->switch_lock)) -#else -/* - * Our UP-case is more simple, but we assume knowledge of how - * spin_unlock_irq() and friends are implemented. This avoids - * us needlessly decrementing and incrementing the preempt count. - */ -#define prepare_arch_switch(rq,next) local_irq_enable() -#define finish_arch_switch(rq,prev) spin_unlock(&(rq)->lock) -#define task_running(rq,p) ((rq)->curr == (p)) -#endif +#define __ARCH_WANT_INTERRUPTS_ON_CTXSW /* * switch_to(prev, next) should switch from task `prev' to `next' diff --git a/include/asm-ia64/system.h b/include/asm-ia64/system.h index 6f516e76d1f..cd2cf76b2db 100644 --- a/include/asm-ia64/system.h +++ b/include/asm-ia64/system.h @@ -183,8 +183,6 @@ do { \ #ifdef __KERNEL__ -#define prepare_to_switch() do { } while(0) - #ifdef CONFIG_IA32_SUPPORT # define IS_IA32_PROCESS(regs) (ia64_psr(regs)->is != 0) #else @@ -274,13 +272,7 @@ extern void ia64_load_extra (struct task_struct *task); * of that CPU which will not be released, because there we wait for the * tasklist_lock to become available. */ -#define prepare_arch_switch(rq, next) \ -do { \ - spin_lock(&(next)->switch_lock); \ - spin_unlock(&(rq)->lock); \ -} while (0) -#define finish_arch_switch(rq, prev) spin_unlock_irq(&(prev)->switch_lock) -#define task_running(rq, p) ((rq)->curr == (p) || spin_is_locked(&(p)->switch_lock)) +#define __ARCH_WANT_UNLOCKED_CTXSW #define ia64_platform_is(x) (strcmp(x, platform_name) == 0) diff --git a/include/asm-mips/system.h b/include/asm-mips/system.h index 888fd890846..169f3d4265b 100644 --- a/include/asm-mips/system.h +++ b/include/asm-mips/system.h @@ -422,16 +422,10 @@ extern void __die_if_kernel(const char *, struct pt_regs *, const char *file, extern int stop_a_enabled; /* - * Taken from include/asm-ia64/system.h; prevents deadlock on SMP + * See include/asm-ia64/system.h; prevents deadlock on SMP * systems. */ -#define prepare_arch_switch(rq, next) \ -do { \ - spin_lock(&(next)->switch_lock); \ - spin_unlock(&(rq)->lock); \ -} while (0) -#define finish_arch_switch(rq, prev) spin_unlock_irq(&(prev)->switch_lock) -#define task_running(rq, p) ((rq)->curr == (p) || spin_is_locked(&(p)->switch_lock)) +#define __ARCH_WANT_UNLOCKED_CTXSW #define arch_align_stack(x) (x) diff --git a/include/asm-s390/system.h b/include/asm-s390/system.h index e3cb3ce1d24..b4a9f05a93d 100644 --- a/include/asm-s390/system.h +++ b/include/asm-s390/system.h @@ -104,29 +104,18 @@ static inline void restore_access_regs(unsigned int *acrs) prev = __switch_to(prev,next); \ } while (0) -#define prepare_arch_switch(rq, next) do { } while(0) -#define task_running(rq, p) ((rq)->curr == (p)) - #ifdef CONFIG_VIRT_CPU_ACCOUNTING extern void account_user_vtime(struct task_struct *); extern void account_system_vtime(struct task_struct *); - -#define finish_arch_switch(rq, prev) do { \ - set_fs(current->thread.mm_segment); \ - spin_unlock(&(rq)->lock); \ - account_system_vtime(prev); \ - local_irq_enable(); \ -} while (0) - #else +#define account_system_vtime(prev) do { } while (0) +#endif #define finish_arch_switch(rq, prev) do { \ set_fs(current->thread.mm_segment); \ - spin_unlock_irq(&(rq)->lock); \ + account_system_vtime(prev); \ } while (0) -#endif - #define nop() __asm__ __volatile__ ("nop") #define xchg(ptr,x) \ diff --git a/include/asm-sparc/system.h b/include/asm-sparc/system.h index 80cf20cfaee..898562ebe94 100644 --- a/include/asm-sparc/system.h +++ b/include/asm-sparc/system.h @@ -101,7 +101,7 @@ extern void fpsave(unsigned long *fpregs, unsigned long *fsr, * SWITCH_ENTER and SWITH_DO_LAZY_FPU do not work yet (e.g. SMP does not work) * XXX WTF is the above comment? Found in late teen 2.4.x. */ -#define prepare_arch_switch(rq, next) do { \ +#define prepare_arch_switch(next) do { \ __asm__ __volatile__( \ ".globl\tflush_patch_switch\nflush_patch_switch:\n\t" \ "save %sp, -0x40, %sp; save %sp, -0x40, %sp; save %sp, -0x40, %sp\n\t" \ @@ -109,8 +109,6 @@ extern void fpsave(unsigned long *fpregs, unsigned long *fsr, "save %sp, -0x40, %sp\n\t" \ "restore; restore; restore; restore; restore; restore; restore"); \ } while(0) -#define finish_arch_switch(rq, next) spin_unlock_irq(&(rq)->lock) -#define task_running(rq, p) ((rq)->curr == (p)) /* Much care has gone into this code, do not touch it. * diff --git a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h index fd12ca386f4..f9be2c5b4dc 100644 --- a/include/asm-sparc64/system.h +++ b/include/asm-sparc64/system.h @@ -139,19 +139,13 @@ extern void __flushw_user(void); #define flush_user_windows flushw_user #define flush_register_windows flushw_all -#define prepare_arch_switch(rq, next) \ -do { spin_lock(&(next)->switch_lock); \ - spin_unlock(&(rq)->lock); \ +/* Don't hold the runqueue lock over context switch */ +#define __ARCH_WANT_UNLOCKED_CTXSW +#define prepare_arch_switch(next) \ +do { \ flushw_all(); \ } while (0) -#define finish_arch_switch(rq, prev) \ -do { spin_unlock_irq(&(prev)->switch_lock); \ -} while (0) - -#define task_running(rq, p) \ - ((rq)->curr == (p) || spin_is_locked(&(p)->switch_lock)) - /* See what happens when you design the chip correctly? * * We tell gcc we clobber all non-fixed-usage registers except diff --git a/include/linux/init_task.h b/include/linux/init_task.h index a6a8c1a38d5..03206a425d7 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -108,7 +108,6 @@ extern struct group_info init_groups; .blocked = {{0}}, \ .alloc_lock = SPIN_LOCK_UNLOCKED, \ .proc_lock = SPIN_LOCK_UNLOCKED, \ - .switch_lock = SPIN_LOCK_UNLOCKED, \ .journal_info = NULL, \ .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ } diff --git a/include/linux/sched.h b/include/linux/sched.h index 36a10781c3f..d27be933742 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -368,6 +368,11 @@ struct signal_struct { #endif }; +/* Context switch must be unlocked if interrupts are to be enabled */ +#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW +# define __ARCH_WANT_UNLOCKED_CTXSW +#endif + /* * Bits in flags field of signal_struct. */ @@ -594,6 +599,9 @@ struct task_struct { int lock_depth; /* BKL lock depth */ +#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) + int oncpu; +#endif int prio, static_prio; struct list_head run_list; prio_array_t *array; @@ -716,8 +724,6 @@ struct task_struct { spinlock_t alloc_lock; /* Protection of proc_dentry: nesting proc_lock, dcache_lock, write_lock_irq(&tasklist_lock); */ spinlock_t proc_lock; -/* context-switch lock */ - spinlock_t switch_lock; /* journalling filesystem info */ void *journal_info; diff --git a/kernel/sched.c b/kernel/sched.c index 98bf1c091da..b1410577f9a 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -268,14 +268,71 @@ static DEFINE_PER_CPU(struct runqueue, runqueues); #define task_rq(p) cpu_rq(task_cpu(p)) #define cpu_curr(cpu) (cpu_rq(cpu)->curr) -/* - * Default context-switch locking: - */ #ifndef prepare_arch_switch -# define prepare_arch_switch(rq, next) do { } while (0) -# define finish_arch_switch(rq, next) spin_unlock_irq(&(rq)->lock) -# define task_running(rq, p) ((rq)->curr == (p)) +# define prepare_arch_switch(next) do { } while (0) +#endif +#ifndef finish_arch_switch +# define finish_arch_switch(prev) do { } while (0) +#endif + +#ifndef __ARCH_WANT_UNLOCKED_CTXSW +static inline int task_running(runqueue_t *rq, task_t *p) +{ + return rq->curr == p; +} + +static inline void prepare_lock_switch(runqueue_t *rq, task_t *next) +{ +} + +static inline void finish_lock_switch(runqueue_t *rq, task_t *prev) +{ + spin_unlock_irq(&rq->lock); +} + +#else /* __ARCH_WANT_UNLOCKED_CTXSW */ +static inline int task_running(runqueue_t *rq, task_t *p) +{ +#ifdef CONFIG_SMP + return p->oncpu; +#else + return rq->curr == p; +#endif +} + +static inline void prepare_lock_switch(runqueue_t *rq, task_t *next) +{ +#ifdef CONFIG_SMP + /* + * We can optimise this out completely for !SMP, because the + * SMP rebalancing from interrupt is the only thing that cares + * here. + */ + next->oncpu = 1; +#endif +#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW + spin_unlock_irq(&rq->lock); +#else + spin_unlock(&rq->lock); #endif +} + +static inline void finish_lock_switch(runqueue_t *rq, task_t *prev) +{ +#ifdef CONFIG_SMP + /* + * After ->oncpu is cleared, the task can be moved to a different CPU. + * We must ensure this doesn't happen until the switch is completely + * finished. + */ + smp_wmb(); + prev->oncpu = 0; +#endif +#ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW + local_irq_enable(); +#endif +} +#endif /* __ARCH_WANT_UNLOCKED_CTXSW */ /* * task_rq_lock - lock the runqueue a given task resides on and disable @@ -1196,17 +1253,14 @@ void fastcall sched_fork(task_t *p) p->state = TASK_RUNNING; INIT_LIST_HEAD(&p->run_list); p->array = NULL; - spin_lock_init(&p->switch_lock); #ifdef CONFIG_SCHEDSTATS memset(&p->sched_info, 0, sizeof(p->sched_info)); #endif +#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) + p->oncpu = 0; +#endif #ifdef CONFIG_PREEMPT - /* - * During context-switch we hold precisely one spinlock, which - * schedule_tail drops. (in the common case it's this_rq()->lock, - * but it also can be p->switch_lock.) So we compensate with a count - * of 1. Also, we want to start with kernel preemption disabled. - */ + /* Want to start with kernel preemption disabled. */ p->thread_info->preempt_count = 1; #endif /* @@ -1387,23 +1441,41 @@ void fastcall sched_exit(task_t * p) task_rq_unlock(rq, &flags); } +/** + * prepare_task_switch - prepare to switch tasks + * @rq: the runqueue preparing to switch + * @next: the task we are going to switch to. + * + * This is called with the rq lock held and interrupts off. It must + * be paired with a subsequent finish_task_switch after the context + * switch. + * + * prepare_task_switch sets up locking and calls architecture specific + * hooks. + */ +static inline void prepare_task_switch(runqueue_t *rq, task_t *next) +{ + prepare_lock_switch(rq, next); + prepare_arch_switch(next); +} + /** * finish_task_switch - clean up after a task-switch * @prev: the thread we just switched away from. * - * We enter this with the runqueue still locked, and finish_arch_switch() - * will unlock it along with doing any other architecture-specific cleanup - * actions. + * finish_task_switch must be called after the context switch, paired + * with a prepare_task_switch call before the context switch. + * finish_task_switch will reconcile locking set up by prepare_task_switch, + * and do any other architecture-specific cleanup actions. * * Note that we may have delayed dropping an mm in context_switch(). If * so, we finish that here outside of the runqueue lock. (Doing it * with the lock held can cause deadlocks; see schedule() for * details.) */ -static inline void finish_task_switch(task_t *prev) +static inline void finish_task_switch(runqueue_t *rq, task_t *prev) __releases(rq->lock) { - runqueue_t *rq = this_rq(); struct mm_struct *mm = rq->prev_mm; unsigned long prev_task_flags; @@ -1421,7 +1493,8 @@ static inline void finish_task_switch(task_t *prev) * Manfred Spraul */ prev_task_flags = prev->flags; - finish_arch_switch(rq, prev); + finish_arch_switch(prev); + finish_lock_switch(rq, prev); if (mm) mmdrop(mm); if (unlikely(prev_task_flags & PF_DEAD)) @@ -1435,8 +1508,12 @@ static inline void finish_task_switch(task_t *prev) asmlinkage void schedule_tail(task_t *prev) __releases(rq->lock) { - finish_task_switch(prev); - + runqueue_t *rq = this_rq(); + finish_task_switch(rq, prev); +#ifdef __ARCH_WANT_UNLOCKED_CTXSW + /* In this case, finish_task_switch does not reenable preemption */ + preempt_enable(); +#endif if (current->set_child_tid) put_user(current->pid, current->set_child_tid); } @@ -2816,11 +2893,15 @@ switch_tasks: rq->curr = next; ++*switch_count; - prepare_arch_switch(rq, next); + prepare_task_switch(rq, next); prev = context_switch(rq, prev, next); barrier(); - - finish_task_switch(prev); + /* + * this_rq must be evaluated again because prev may have moved + * CPUs since it called schedule(), thus the 'rq' on its stack + * frame will be invalid. + */ + finish_task_switch(this_rq(), prev); } else spin_unlock_irq(&rq->lock); @@ -4085,6 +4166,9 @@ void __devinit init_idle(task_t *idle, int cpu) spin_lock_irqsave(&rq->lock, flags); rq->curr = rq->idle = idle; +#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) + idle->oncpu = 1; +#endif set_tsk_need_resched(idle); spin_unlock_irqrestore(&rq->lock, flags); -- cgit v1.2.3 From 41c7ce9ad9a859871dffbe7dbc8b1f9571724e3c Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 25 Jun 2005 14:57:24 -0700 Subject: [PATCH] sched: null domains Fix the last 2 places that directly access a runqueue's sched-domain and assume it cannot be NULL. That allows the use of NULL for domain, instead of a dummy domain, to signify no balancing is to happen. No functional changes. Signed-off-by: Nick Piggin Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index b1410577f9a..77c07c2928b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2579,11 +2579,15 @@ out: #ifdef CONFIG_SCHED_SMT static inline void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq) { - struct sched_domain *sd = this_rq->sd; + struct sched_domain *tmp, *sd = NULL; cpumask_t sibling_map; int i; - if (!(sd->flags & SD_SHARE_CPUPOWER)) + for_each_domain(this_cpu, tmp) + if (tmp->flags & SD_SHARE_CPUPOWER) + sd = tmp; + + if (!sd) return; /* @@ -2624,13 +2628,17 @@ static inline void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq) static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq) { - struct sched_domain *sd = this_rq->sd; + struct sched_domain *tmp, *sd = NULL; cpumask_t sibling_map; prio_array_t *array; int ret = 0, i; task_t *p; - if (!(sd->flags & SD_SHARE_CPUPOWER)) + for_each_domain(this_cpu, tmp) + if (tmp->flags & SD_SHARE_CPUPOWER) + sd = tmp; + + if (!sd) return 0; /* @@ -4617,6 +4625,11 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) { int level = 0; + if (!sd) { + printk(KERN_DEBUG "CPU%d attaching NULL sched-domain.\n", cpu); + return; + } + printk(KERN_DEBUG "CPU%d attaching sched-domain:\n", cpu); do { @@ -4874,7 +4887,7 @@ static void __devinit arch_init_sched_domains(void) cpus_and(cpu_default_map, cpu_default_map, cpu_online_map); /* - * Set up domains. Isolated domains just stay on the dummy domain. + * Set up domains. Isolated domains just stay on the NULL domain. */ for_each_cpu_mask(i, cpu_default_map) { int group; @@ -4987,18 +5000,11 @@ static void __devinit arch_destroy_sched_domains(void) #endif /* ARCH_HAS_SCHED_DOMAIN */ -/* - * Initial dummy domain for early boot and for hotplug cpu. Being static, - * it is initialized to zero, so all balancing flags are cleared which is - * what we want. - */ -static struct sched_domain sched_domain_dummy; - #ifdef CONFIG_HOTPLUG_CPU /* * Force a reinitialization of the sched domains hierarchy. The domains * and groups cannot be updated in place without racing with the balancing - * code, so we temporarily attach all running cpus to a "dummy" domain + * code, so we temporarily attach all running cpus to the NULL domain * which will prevent rebalancing while the sched domains are recalculated. */ static int update_sched_domains(struct notifier_block *nfb, @@ -5010,7 +5016,7 @@ static int update_sched_domains(struct notifier_block *nfb, case CPU_UP_PREPARE: case CPU_DOWN_PREPARE: for_each_online_cpu(i) - cpu_attach_domain(&sched_domain_dummy, i); + cpu_attach_domain(NULL, i); arch_destroy_sched_domains(); return NOTIFY_OK; @@ -5072,7 +5078,7 @@ void __init sched_init(void) rq->best_expired_prio = MAX_PRIO; #ifdef CONFIG_SMP - rq->sd = &sched_domain_dummy; + rq->sd = NULL; for (j = 1; j < 3; j++) rq->cpu_load[j] = 0; rq->active_balance = 0; -- cgit v1.2.3 From 245af2c7870bd5940f7bfad19a0a03b32751fbc5 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Sat, 25 Jun 2005 14:57:25 -0700 Subject: [PATCH] sched: remove degenerate domains Remove degenerate scheduler domains during the sched-domain init. For example on x86_64, we always have NUMA configured in. On Intel EM64T systems, top most sched domain will be of NUMA and with only one sched_group in it. With fork/exec balances(recent Nick's fixes in -mm tree), we always endup taking wrong decisions because of this topmost domain (as it contains only one group and find_idlest_group always returns NULL). We will endup loading HT package completely first, letting active load balance kickin and correct it. In general, this patch also makes sense with out recent Nick's fixes in -mm. From: Nick Piggin Modified to account for more than just sched_groups when scanning for degenerate domains by Nick Piggin. And allow a runqueue's sd to go NULL rather than keep a single degenerate domain around (this happens when you run with maxcpus=1). Signed-off-by: Suresh Siddha Signed-off-by: Nick Piggin Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/kernel/sched.c b/kernel/sched.c index 77c07c2928b..e75b301b534 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4712,6 +4712,57 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) #define sched_domain_debug(sd, cpu) {} #endif +static int __devinit sd_degenerate(struct sched_domain *sd) +{ + if (cpus_weight(sd->span) == 1) + return 1; + + /* Following flags need at least 2 groups */ + if (sd->flags & (SD_LOAD_BALANCE | + SD_BALANCE_NEWIDLE | + SD_BALANCE_FORK | + SD_BALANCE_EXEC)) { + if (sd->groups != sd->groups->next) + return 0; + } + + /* Following flags don't use groups */ + if (sd->flags & (SD_WAKE_IDLE | + SD_WAKE_AFFINE | + SD_WAKE_BALANCE)) + return 0; + + return 1; +} + +static int __devinit sd_parent_degenerate(struct sched_domain *sd, + struct sched_domain *parent) +{ + unsigned long cflags = sd->flags, pflags = parent->flags; + + if (sd_degenerate(parent)) + return 1; + + if (!cpus_equal(sd->span, parent->span)) + return 0; + + /* Does parent contain flags not in child? */ + /* WAKE_BALANCE is a subset of WAKE_AFFINE */ + if (cflags & SD_WAKE_AFFINE) + pflags &= ~SD_WAKE_BALANCE; + /* Flags needing groups don't count if only 1 group in parent */ + if (parent->groups == parent->groups->next) { + pflags &= ~(SD_LOAD_BALANCE | + SD_BALANCE_NEWIDLE | + SD_BALANCE_FORK | + SD_BALANCE_EXEC); + } + if (~cflags & pflags) + return 0; + + return 1; +} + /* * Attach the domain 'sd' to 'cpu' as its base domain. Callers must * hold the hotplug lock. @@ -4722,6 +4773,19 @@ void __devinit cpu_attach_domain(struct sched_domain *sd, int cpu) unsigned long flags; runqueue_t *rq = cpu_rq(cpu); int local = 1; + struct sched_domain *tmp; + + /* Remove the sched domains which do not contribute to scheduling. */ + for (tmp = sd; tmp; tmp = tmp->parent) { + struct sched_domain *parent = tmp->parent; + if (!parent) + break; + if (sd_parent_degenerate(tmp, parent)) + tmp->parent = parent->parent; + } + + if (sd && sd_degenerate(sd)) + sd = sd->parent; sched_domain_debug(sd, cpu); -- cgit v1.2.3 From 3dbd5342074a1e570ec84edf859deb9be588006d Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 25 Jun 2005 14:57:26 -0700 Subject: [PATCH] sched: multilevel sbe sbf The fundamental problem that Suresh has with balance on exec and fork is that it only tries to balance the top level domain with the flag set. This was worked around by removing degenerate domains, but is still a problem if people want to start using more complex sched-domains, especially multilevel NUMA that ia64 is already using. This patch makes balance on fork and exec try balancing over not just the top most domain with the flag set, but all the way down the domain tree. Signed-off-by: Nick Piggin Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched.c | 45 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 7 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index e75b301b534..ef32389ee76 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1319,21 +1319,24 @@ void fastcall wake_up_new_task(task_t * p, unsigned long clone_flags) sd = tmp; if (sd) { + cpumask_t span; int new_cpu; struct sched_group *group; +again: schedstat_inc(sd, sbf_cnt); + span = sd->span; cpu = task_cpu(p); group = find_idlest_group(sd, p, cpu); if (!group) { schedstat_inc(sd, sbf_balanced); - goto no_forkbalance; + goto nextlevel; } new_cpu = find_idlest_cpu(group, cpu); if (new_cpu == -1 || new_cpu == cpu) { schedstat_inc(sd, sbf_balanced); - goto no_forkbalance; + goto nextlevel; } if (cpu_isset(new_cpu, p->cpus_allowed)) { @@ -1343,9 +1346,21 @@ void fastcall wake_up_new_task(task_t * p, unsigned long clone_flags) rq = task_rq_lock(p, &flags); cpu = task_cpu(p); } + + /* Now try balancing at a lower domain level */ +nextlevel: + sd = NULL; + for_each_domain(cpu, tmp) { + if (cpus_subset(span, tmp->span)) + break; + if (tmp->flags & SD_BALANCE_FORK) + sd = tmp; + } + + if (sd) + goto again; } -no_forkbalance: #endif /* * We decrease the sleep average of forking parents @@ -1711,25 +1726,41 @@ void sched_exec(void) sd = tmp; if (sd) { + cpumask_t span; struct sched_group *group; +again: schedstat_inc(sd, sbe_cnt); + span = sd->span; group = find_idlest_group(sd, current, this_cpu); if (!group) { schedstat_inc(sd, sbe_balanced); - goto out; + goto nextlevel; } new_cpu = find_idlest_cpu(group, this_cpu); if (new_cpu == -1 || new_cpu == this_cpu) { schedstat_inc(sd, sbe_balanced); - goto out; + goto nextlevel; } schedstat_inc(sd, sbe_pushed); put_cpu(); sched_migrate_task(current, new_cpu); - return; + + /* Now try balancing at a lower domain level */ + this_cpu = get_cpu(); +nextlevel: + sd = NULL; + for_each_domain(this_cpu, tmp) { + if (cpus_subset(span, tmp->span)) + break; + if (tmp->flags & SD_BALANCE_EXEC) + sd = tmp; + } + + if (sd) + goto again; } -out: + put_cpu(); } -- cgit v1.2.3 From 674311d5b411e9042df4fdf7aef0b3c8217b6240 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 25 Jun 2005 14:57:27 -0700 Subject: [PATCH] sched: RCU domains One of the problems with the multilevel balance-on-fork/exec is that it needs to jump through hoops to satisfy sched-domain's locking semantics (that is, you may traverse your own domain when not preemptable, and you may traverse others' domains when holding their runqueue lock). balance-on-exec had to potentially migrate between more than one CPU before finding a final CPU to migrate to, and balance-on-fork needed to potentially take multiple runqueue locks. So bite the bullet and make sched-domains go completely RCU. This actually simplifies the code quite a bit. From: Ingo Molnar schedstats RCU fix, and a nice comment on for_each_domain, from Ingo. Signed-off-by: Ingo Molnar Signed-off-by: Nick Piggin Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched.c | 60 +++++++++++++++------------------------------------------- 1 file changed, 15 insertions(+), 45 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index ef32389ee76..54ce787b620 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -260,8 +260,15 @@ struct runqueue { static DEFINE_PER_CPU(struct runqueue, runqueues); +/* + * The domain tree (rq->sd) is protected by RCU's quiescent state transition. + * See update_sched_domains: synchronize_kernel for details. + * + * The domain tree of any CPU may only be accessed from within + * preempt-disabled sections. + */ #define for_each_domain(cpu, domain) \ - for (domain = cpu_rq(cpu)->sd; domain; domain = domain->parent) +for (domain = rcu_dereference(cpu_rq(cpu)->sd); domain; domain = domain->parent) #define cpu_rq(cpu) (&per_cpu(runqueues, (cpu))) #define this_rq() (&__get_cpu_var(runqueues)) @@ -395,6 +402,7 @@ static int show_schedstat(struct seq_file *seq, void *v) #ifdef CONFIG_SMP /* domain-specific stats */ + preempt_disable(); for_each_domain(cpu, sd) { enum idle_type itype; char mask_str[NR_CPUS]; @@ -419,6 +427,7 @@ static int show_schedstat(struct seq_file *seq, void *v) sd->sbf_cnt, sd->sbf_balanced, sd->sbf_pushed, sd->ttwu_wake_remote, sd->ttwu_move_affine, sd->ttwu_move_balance); } + preempt_enable(); #endif } return 0; @@ -824,22 +833,12 @@ inline int task_curr(const task_t *p) } #ifdef CONFIG_SMP -enum request_type { - REQ_MOVE_TASK, - REQ_SET_DOMAIN, -}; - typedef struct { struct list_head list; - enum request_type type; - /* For REQ_MOVE_TASK */ task_t *task; int dest_cpu; - /* For REQ_SET_DOMAIN */ - struct sched_domain *sd; - struct completion done; } migration_req_t; @@ -861,7 +860,6 @@ static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req) } init_completion(&req->done); - req->type = REQ_MOVE_TASK; req->task = p; req->dest_cpu = dest_cpu; list_add(&req->list, &rq->migration_queue); @@ -4378,17 +4376,9 @@ static int migration_thread(void * data) req = list_entry(head->next, migration_req_t, list); list_del_init(head->next); - if (req->type == REQ_MOVE_TASK) { - spin_unlock(&rq->lock); - __migrate_task(req->task, cpu, req->dest_cpu); - local_irq_enable(); - } else if (req->type == REQ_SET_DOMAIN) { - rq->sd = req->sd; - spin_unlock_irq(&rq->lock); - } else { - spin_unlock_irq(&rq->lock); - WARN_ON(1); - } + spin_unlock(&rq->lock); + __migrate_task(req->task, cpu, req->dest_cpu); + local_irq_enable(); complete(&req->done); } @@ -4619,7 +4609,6 @@ static int migration_call(struct notifier_block *nfb, unsigned long action, migration_req_t *req; req = list_entry(rq->migration_queue.next, migration_req_t, list); - BUG_ON(req->type != REQ_MOVE_TASK); list_del_init(&req->list); complete(&req->done); } @@ -4800,10 +4789,7 @@ static int __devinit sd_parent_degenerate(struct sched_domain *sd, */ void __devinit cpu_attach_domain(struct sched_domain *sd, int cpu) { - migration_req_t req; - unsigned long flags; runqueue_t *rq = cpu_rq(cpu); - int local = 1; struct sched_domain *tmp; /* Remove the sched domains which do not contribute to scheduling. */ @@ -4820,24 +4806,7 @@ void __devinit cpu_attach_domain(struct sched_domain *sd, int cpu) sched_domain_debug(sd, cpu); - spin_lock_irqsave(&rq->lock, flags); - - if (cpu == smp_processor_id() || !cpu_online(cpu)) { - rq->sd = sd; - } else { - init_completion(&req.done); - req.type = REQ_SET_DOMAIN; - req.sd = sd; - list_add(&req.list, &rq->migration_queue); - local = 0; - } - - spin_unlock_irqrestore(&rq->lock, flags); - - if (!local) { - wake_up_process(rq->migration_thread); - wait_for_completion(&req.done); - } + rcu_assign_pointer(rq->sd, sd); } /* cpus with isolated domains */ @@ -5112,6 +5081,7 @@ static int update_sched_domains(struct notifier_block *nfb, case CPU_DOWN_PREPARE: for_each_online_cpu(i) cpu_attach_domain(NULL, i); + synchronize_kernel(); arch_destroy_sched_domains(); return NOTIFY_OK; -- cgit v1.2.3 From 476d139c218e44e045e4bc6d4cc02b010b343939 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 25 Jun 2005 14:57:29 -0700 Subject: [PATCH] sched: consolidate sbe sbf Consolidate balance-on-exec with balance-on-fork. This is made easy by the sched-domains RCU patches. As well as the general goodness of code reduction, this allows the runqueues to be unlocked during balance-on-fork. schedstats is a problem. Maybe just have balance-on-event instead of distinguishing fork and exec? Signed-off-by: Nick Piggin Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 +- kernel/fork.c | 21 +++--- kernel/sched.c | 174 ++++++++++++++++++++------------------------------ 3 files changed, 81 insertions(+), 116 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index d27be933742..edb2c69a887 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -930,7 +930,7 @@ extern void FASTCALL(wake_up_new_task(struct task_struct * tsk, #else static inline void kick_process(struct task_struct *tsk) { } #endif -extern void FASTCALL(sched_fork(task_t * p)); +extern void FASTCALL(sched_fork(task_t * p, int clone_flags)); extern void FASTCALL(sched_exit(task_t * p)); extern int in_group_p(gid_t); diff --git a/kernel/fork.c b/kernel/fork.c index a28d11e1087..2c7806873bf 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1003,9 +1003,6 @@ static task_t *copy_process(unsigned long clone_flags, p->pdeath_signal = 0; p->exit_state = 0; - /* Perform scheduler related setup */ - sched_fork(p); - /* * Ok, make it visible to the rest of the system. * We dont wake it up yet. @@ -1014,18 +1011,24 @@ static task_t *copy_process(unsigned long clone_flags, INIT_LIST_HEAD(&p->ptrace_children); INIT_LIST_HEAD(&p->ptrace_list); + /* Perform scheduler related setup. Assign this task to a CPU. */ + sched_fork(p, clone_flags); + /* Need tasklist lock for parent etc handling! */ write_lock_irq(&tasklist_lock); /* - * The task hasn't been attached yet, so cpus_allowed mask cannot - * have changed. The cpus_allowed mask of the parent may have - * changed after it was copied first time, and it may then move to - * another CPU - so we re-copy it here and set the child's CPU to - * the parent's CPU. This avoids alot of nasty races. + * The task hasn't been attached yet, so its cpus_allowed mask will + * not be changed, nor will its assigned CPU. + * + * The cpus_allowed mask of the parent may have changed after it was + * copied first time - so re-copy it here, then check the child's CPU + * to ensure it is on a valid CPU (and if not, just force it back to + * parent's CPU). This avoids alot of nasty races. */ p->cpus_allowed = current->cpus_allowed; - set_task_cpu(p, smp_processor_id()); + if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed))) + set_task_cpu(p, smp_processor_id()); /* * Check for pending SIGKILL! The new thread should not be allowed diff --git a/kernel/sched.c b/kernel/sched.c index 54ce787b620..579da278e72 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1021,8 +1021,59 @@ static int find_idlest_cpu(struct sched_group *group, int this_cpu) return idlest; } +/* + * sched_balance_self: balance the current task (running on cpu) in domains + * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and + * SD_BALANCE_EXEC. + * + * Balance, ie. select the least loaded group. + * + * Returns the target CPU number, or the same CPU if no balancing is needed. + * + * preempt must be disabled. + */ +static int sched_balance_self(int cpu, int flag) +{ + struct task_struct *t = current; + struct sched_domain *tmp, *sd = NULL; -#endif + for_each_domain(cpu, tmp) + if (tmp->flags & flag) + sd = tmp; + + while (sd) { + cpumask_t span; + struct sched_group *group; + int new_cpu; + int weight; + + span = sd->span; + group = find_idlest_group(sd, t, cpu); + if (!group) + goto nextlevel; + + new_cpu = find_idlest_cpu(group, cpu); + if (new_cpu == -1 || new_cpu == cpu) + goto nextlevel; + + /* Now try balancing at a lower domain level */ + cpu = new_cpu; +nextlevel: + sd = NULL; + weight = cpus_weight(span); + for_each_domain(cpu, tmp) { + if (weight <= cpus_weight(tmp->span)) + break; + if (tmp->flags & flag) + sd = tmp; + } + /* while loop will break here if sd == NULL */ + } + + return cpu; +} + +#endif /* CONFIG_SMP */ /* * wake_idle() will wake a task on an idle cpu if task->cpu is @@ -1240,8 +1291,15 @@ int fastcall wake_up_state(task_t *p, unsigned int state) * Perform scheduler related setup for a newly forked process p. * p is forked by current. */ -void fastcall sched_fork(task_t *p) +void fastcall sched_fork(task_t *p, int clone_flags) { + int cpu = get_cpu(); + +#ifdef CONFIG_SMP + cpu = sched_balance_self(cpu, SD_BALANCE_FORK); +#endif + set_task_cpu(p, cpu); + /* * We mark the process as running here, but have not actually * inserted it onto the runqueue yet. This guarantees that @@ -1282,12 +1340,10 @@ void fastcall sched_fork(task_t *p) * runqueue lock is not a problem. */ current->time_slice = 1; - preempt_disable(); scheduler_tick(); - local_irq_enable(); - preempt_enable(); - } else - local_irq_enable(); + } + local_irq_enable(); + put_cpu(); } /* @@ -1302,64 +1358,12 @@ void fastcall wake_up_new_task(task_t * p, unsigned long clone_flags) unsigned long flags; int this_cpu, cpu; runqueue_t *rq, *this_rq; -#ifdef CONFIG_SMP - struct sched_domain *tmp, *sd = NULL; -#endif rq = task_rq_lock(p, &flags); BUG_ON(p->state != TASK_RUNNING); this_cpu = smp_processor_id(); cpu = task_cpu(p); -#ifdef CONFIG_SMP - for_each_domain(cpu, tmp) - if (tmp->flags & SD_BALANCE_FORK) - sd = tmp; - - if (sd) { - cpumask_t span; - int new_cpu; - struct sched_group *group; - -again: - schedstat_inc(sd, sbf_cnt); - span = sd->span; - cpu = task_cpu(p); - group = find_idlest_group(sd, p, cpu); - if (!group) { - schedstat_inc(sd, sbf_balanced); - goto nextlevel; - } - - new_cpu = find_idlest_cpu(group, cpu); - if (new_cpu == -1 || new_cpu == cpu) { - schedstat_inc(sd, sbf_balanced); - goto nextlevel; - } - - if (cpu_isset(new_cpu, p->cpus_allowed)) { - schedstat_inc(sd, sbf_pushed); - set_task_cpu(p, new_cpu); - task_rq_unlock(rq, &flags); - rq = task_rq_lock(p, &flags); - cpu = task_cpu(p); - } - - /* Now try balancing at a lower domain level */ -nextlevel: - sd = NULL; - for_each_domain(cpu, tmp) { - if (cpus_subset(span, tmp->span)) - break; - if (tmp->flags & SD_BALANCE_FORK) - sd = tmp; - } - - if (sd) - goto again; - } - -#endif /* * We decrease the sleep average of forking parents * and children as well, to keep max-interactive tasks @@ -1708,58 +1712,16 @@ out: } /* - * sched_exec(): find the highest-level, exec-balance-capable - * domain and try to migrate the task to the least loaded CPU. - * - * execve() is a valuable balancing opportunity, because at this point - * the task has the smallest effective memory and cache footprint. + * sched_exec - execve() is a valuable balancing opportunity, because at + * this point the task has the smallest effective memory and cache footprint. */ void sched_exec(void) { - struct sched_domain *tmp, *sd = NULL; int new_cpu, this_cpu = get_cpu(); - - for_each_domain(this_cpu, tmp) - if (tmp->flags & SD_BALANCE_EXEC) - sd = tmp; - - if (sd) { - cpumask_t span; - struct sched_group *group; -again: - schedstat_inc(sd, sbe_cnt); - span = sd->span; - group = find_idlest_group(sd, current, this_cpu); - if (!group) { - schedstat_inc(sd, sbe_balanced); - goto nextlevel; - } - new_cpu = find_idlest_cpu(group, this_cpu); - if (new_cpu == -1 || new_cpu == this_cpu) { - schedstat_inc(sd, sbe_balanced); - goto nextlevel; - } - - schedstat_inc(sd, sbe_pushed); - put_cpu(); - sched_migrate_task(current, new_cpu); - - /* Now try balancing at a lower domain level */ - this_cpu = get_cpu(); -nextlevel: - sd = NULL; - for_each_domain(this_cpu, tmp) { - if (cpus_subset(span, tmp->span)) - break; - if (tmp->flags & SD_BALANCE_EXEC) - sd = tmp; - } - - if (sd) - goto again; - } - + new_cpu = sched_balance_self(this_cpu, SD_BALANCE_EXEC); put_cpu(); + if (new_cpu != this_cpu) + sched_migrate_task(current, new_cpu); } /* -- cgit v1.2.3 From 77391d71681d05d2f4502f91ad62618522abf624 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 25 Jun 2005 14:57:30 -0700 Subject: [PATCH] sched: relax pinned balancing The maximum rebalance interval allowed by the multiprocessor balancing backoff is often not large enough to handle corner cases where there are lots of tasks pinned on a CPU. Suresh reported: I see system livelock's if for example I have 7000 processes pinned onto one cpu (this is on the fastest 8-way system I have access to). After this patch, the machine is reported to go well above this number. Signed-off-by: Nick Piggin Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 579da278e72..6e452eb95ac 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2030,6 +2030,12 @@ static runqueue_t *find_busiest_queue(struct sched_group *group) return busiest; } +/* + * Max backoff if we encounter pinned tasks. Pretty arbitrary value, but + * so long as it is large enough. + */ +#define MAX_PINNED_INTERVAL 512 + /* * Check this_cpu to ensure it is balanced within domain. Attempt to move * tasks if there is an imbalance. @@ -2042,7 +2048,7 @@ static int load_balance(int this_cpu, runqueue_t *this_rq, struct sched_group *group; runqueue_t *busiest; unsigned long imbalance; - int nr_moved, all_pinned; + int nr_moved, all_pinned = 0; int active_balance = 0; spin_lock(&this_rq->lock); @@ -2133,7 +2139,8 @@ out_balanced: sd->nr_balance_failed = 0; /* tune up the balancing interval */ - if (sd->balance_interval < sd->max_interval) + if ((all_pinned && sd->balance_interval < MAX_PINNED_INTERVAL) || + (sd->balance_interval < sd->max_interval)) sd->balance_interval *= 2; return 0; -- cgit v1.2.3 From a3464a102a69a4e00efb0a763e274ce290995b4b Mon Sep 17 00:00:00 2001 From: Chen Shang Date: Sat, 25 Jun 2005 14:57:31 -0700 Subject: [PATCH] sched: micro-optimize task requeueing in schedule() micro-optimize task requeueing in schedule() & clean up recalc_task_prio(). Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 6e452eb95ac..a3d1c8e43d3 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -673,7 +673,7 @@ static inline void __activate_idle_task(task_t *p, runqueue_t *rq) rq->nr_running++; } -static void recalc_task_prio(task_t *p, unsigned long long now) +static int recalc_task_prio(task_t *p, unsigned long long now) { /* Caller must always ensure 'now >= p->timestamp' */ unsigned long long __sleep_time = now - p->timestamp; @@ -732,7 +732,7 @@ static void recalc_task_prio(task_t *p, unsigned long long now) } } - p->prio = effective_prio(p); + return effective_prio(p); } /* @@ -755,7 +755,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local) } #endif - recalc_task_prio(p, now); + p->prio = recalc_task_prio(p, now); /* * This checks to make sure it's not an uninterruptible task @@ -2751,7 +2751,7 @@ asmlinkage void __sched schedule(void) struct list_head *queue; unsigned long long now; unsigned long run_time; - int cpu, idx; + int cpu, idx, new_prio; /* * Test if we are atomic. Since do_exit() needs to call into @@ -2873,9 +2873,14 @@ go_idle: delta = delta * (ON_RUNQUEUE_WEIGHT * 128 / 100) / 128; array = next->array; - dequeue_task(next, array); - recalc_task_prio(next, next->timestamp + delta); - enqueue_task(next, array); + new_prio = recalc_task_prio(next, next->timestamp + delta); + + if (unlikely(next->prio != new_prio)) { + dequeue_task(next, array); + next->prio = new_prio; + enqueue_task(next, array); + } else + requeue_task(next, array); } next->activated = 0; switch_tasks: -- cgit v1.2.3 From 37e4ab3f0cba13adf3535d373fd98e5ee47b5410 Mon Sep 17 00:00:00 2001 From: Olivier Croquette Date: Sat, 25 Jun 2005 14:57:32 -0700 Subject: [PATCH] Changing RT priority without CAP_SYS_NICE Presently, a process without the capability CAP_SYS_NICE can not change its own policy, which is OK. But it can also not decrease its RT priority (if scheduled with policy SCHED_RR or SCHED_FIFO), which is what this patch changes. The rationale is the same as for the nice value: a process should be able to require less priority for itself. Increasing the priority is still not allowed. This is for example useful if you give a multithreaded user process a RT priority, and the process would like to organize its internal threads using priorities also. Then you can give the process the highest priority needed N, and the process starts its threads with lower priorities: N-1, N-2... The POSIX norm says that the permissions are implementation specific, so I think we can do that. In a sense, it makes the permissions consistent whatever the policy is: with this patch, process scheduled by SCHED_FIFO, SCHED_RR and SCHED_OTHER can all decrease their priority. From: Ingo Molnar cleaned up and merged to -mm. Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index a3d1c8e43d3..d3d81b82e37 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3531,13 +3531,24 @@ recheck: if ((policy == SCHED_NORMAL) != (param->sched_priority == 0)) return -EINVAL; - if ((policy == SCHED_FIFO || policy == SCHED_RR) && - param->sched_priority > p->signal->rlim[RLIMIT_RTPRIO].rlim_cur && - !capable(CAP_SYS_NICE)) - return -EPERM; - if ((current->euid != p->euid) && (current->euid != p->uid) && - !capable(CAP_SYS_NICE)) - return -EPERM; + /* + * Allow unprivileged RT tasks to decrease priority: + */ + if (!capable(CAP_SYS_NICE)) { + /* can't change policy */ + if (policy != p->policy) + return -EPERM; + /* can't increase priority */ + if (policy != SCHED_NORMAL && + param->sched_priority > p->rt_priority && + param->sched_priority > + p->signal->rlim[RLIMIT_RTPRIO].rlim_cur) + return -EPERM; + /* can't change other user's priorities */ + if ((current->euid != p->euid) && + (current->euid != p->uid)) + return -EPERM; + } retval = security_task_setscheduler(p, policy, param); if (retval) -- cgit v1.2.3 From 1a20ff27ef75d866730ee796acd811a925af762f Mon Sep 17 00:00:00 2001 From: Dinakar Guniguntala Date: Sat, 25 Jun 2005 14:57:33 -0700 Subject: [PATCH] Dynamic sched domains: sched changes The following patches add dynamic sched domains functionality that was extensively discussed on lkml and lse-tech. I would like to see this added to -mm o The main advantage with this feature is that it ensures that the scheduler load balacing code only balances against the cpus that are in the sched domain as defined by an exclusive cpuset and not all of the cpus in the system. This removes any overhead due to load balancing code trying to pull tasks outside of the cpu exclusive cpuset only to be prevented by the tasks' cpus_allowed mask. o cpu exclusive cpusets are useful for servers running orthogonal workloads such as RT applications requiring low latency and HPC applications that are throughput sensitive o It provides a new API partition_sched_domains in sched.c that makes dynamic sched domains possible. o cpu_exclusive cpusets sets are now associated with a sched domain. Which means that the users can dynamically modify the sched domains through the cpuset file system interface o ia64 sched domain code has been updated to support this feature as well o Currently, this does not support hotplug. (However some of my tests indicate hotplug+preempt is currently broken) o I have tested it extensively on x86. o This should have very minimal impact on performance as none of the fast paths are affected Signed-off-by: Dinakar Guniguntala Acked-by: Paul Jackson Acked-by: Nick Piggin Acked-by: Matthew Dobson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 + kernel/sched.c | 132 ++++++++++++++++++++++++++++++++------------------ 2 files changed, 88 insertions(+), 46 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index edb2c69a887..98c109e4f43 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -539,6 +539,8 @@ struct sched_domain { #endif }; +extern void partition_sched_domains(cpumask_t *partition1, + cpumask_t *partition2); #ifdef ARCH_HAS_SCHED_DOMAIN /* Useful helpers that arch setup code may use. Defined in kernel/sched.c */ extern cpumask_t cpu_isolated_map; diff --git a/kernel/sched.c b/kernel/sched.c index d3d81b82e37..dee96b22635 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -262,7 +262,7 @@ static DEFINE_PER_CPU(struct runqueue, runqueues); /* * The domain tree (rq->sd) is protected by RCU's quiescent state transition. - * See update_sched_domains: synchronize_kernel for details. + * See detach_destroy_domains: synchronize_sched for details. * * The domain tree of any CPU may only be accessed from within * preempt-disabled sections. @@ -4624,7 +4624,7 @@ int __init migration_init(void) #endif #ifdef CONFIG_SMP -#define SCHED_DOMAIN_DEBUG +#undef SCHED_DOMAIN_DEBUG #ifdef SCHED_DOMAIN_DEBUG static void sched_domain_debug(struct sched_domain *sd, int cpu) { @@ -4717,7 +4717,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) #define sched_domain_debug(sd, cpu) {} #endif -static int __devinit sd_degenerate(struct sched_domain *sd) +static int sd_degenerate(struct sched_domain *sd) { if (cpus_weight(sd->span) == 1) return 1; @@ -4740,7 +4740,7 @@ static int __devinit sd_degenerate(struct sched_domain *sd) return 1; } -static int __devinit sd_parent_degenerate(struct sched_domain *sd, +static int sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) { unsigned long cflags = sd->flags, pflags = parent->flags; @@ -4772,7 +4772,7 @@ static int __devinit sd_parent_degenerate(struct sched_domain *sd, * Attach the domain 'sd' to 'cpu' as its base domain. Callers must * hold the hotplug lock. */ -void __devinit cpu_attach_domain(struct sched_domain *sd, int cpu) +void cpu_attach_domain(struct sched_domain *sd, int cpu) { runqueue_t *rq = cpu_rq(cpu); struct sched_domain *tmp; @@ -4823,7 +4823,7 @@ __setup ("isolcpus=", isolated_cpu_setup); * covered by the given span, and will set each group's ->cpumask correctly, * and ->cpu_power to 0. */ -void __devinit init_sched_build_groups(struct sched_group groups[], +void init_sched_build_groups(struct sched_group groups[], cpumask_t span, int (*group_fn)(int cpu)) { struct sched_group *first = NULL, *last = NULL; @@ -4859,13 +4859,14 @@ void __devinit init_sched_build_groups(struct sched_group groups[], #ifdef ARCH_HAS_SCHED_DOMAIN -extern void __devinit arch_init_sched_domains(void); -extern void __devinit arch_destroy_sched_domains(void); +extern void build_sched_domains(const cpumask_t *cpu_map); +extern void arch_init_sched_domains(const cpumask_t *cpu_map); +extern void arch_destroy_sched_domains(const cpumask_t *cpu_map); #else #ifdef CONFIG_SCHED_SMT static DEFINE_PER_CPU(struct sched_domain, cpu_domains); static struct sched_group sched_group_cpus[NR_CPUS]; -static int __devinit cpu_to_cpu_group(int cpu) +static int cpu_to_cpu_group(int cpu) { return cpu; } @@ -4873,7 +4874,7 @@ static int __devinit cpu_to_cpu_group(int cpu) static DEFINE_PER_CPU(struct sched_domain, phys_domains); static struct sched_group sched_group_phys[NR_CPUS]; -static int __devinit cpu_to_phys_group(int cpu) +static int cpu_to_phys_group(int cpu) { #ifdef CONFIG_SCHED_SMT return first_cpu(cpu_sibling_map[cpu]); @@ -4886,7 +4887,7 @@ static int __devinit cpu_to_phys_group(int cpu) static DEFINE_PER_CPU(struct sched_domain, node_domains); static struct sched_group sched_group_nodes[MAX_NUMNODES]; -static int __devinit cpu_to_node_group(int cpu) +static int cpu_to_node_group(int cpu) { return cpu_to_node(cpu); } @@ -4917,39 +4918,28 @@ static void check_sibling_maps(void) #endif /* - * Set up scheduler domains and groups. Callers must hold the hotplug lock. + * Build sched domains for a given set of cpus and attach the sched domains + * to the individual cpus */ -static void __devinit arch_init_sched_domains(void) +static void build_sched_domains(const cpumask_t *cpu_map) { int i; - cpumask_t cpu_default_map; - -#if defined(CONFIG_SCHED_SMT) && defined(CONFIG_NUMA) - check_sibling_maps(); -#endif - /* - * Setup mask for cpus without special case scheduling requirements. - * For now this just excludes isolated cpus, but could be used to - * exclude other special cases in the future. - */ - cpus_complement(cpu_default_map, cpu_isolated_map); - cpus_and(cpu_default_map, cpu_default_map, cpu_online_map); /* - * Set up domains. Isolated domains just stay on the NULL domain. + * Set up domains for cpus specified by the cpu_map. */ - for_each_cpu_mask(i, cpu_default_map) { + for_each_cpu_mask(i, *cpu_map) { int group; struct sched_domain *sd = NULL, *p; cpumask_t nodemask = node_to_cpumask(cpu_to_node(i)); - cpus_and(nodemask, nodemask, cpu_default_map); + cpus_and(nodemask, nodemask, *cpu_map); #ifdef CONFIG_NUMA sd = &per_cpu(node_domains, i); group = cpu_to_node_group(i); *sd = SD_NODE_INIT; - sd->span = cpu_default_map; + sd->span = *cpu_map; sd->groups = &sched_group_nodes[group]; #endif @@ -4967,7 +4957,7 @@ static void __devinit arch_init_sched_domains(void) group = cpu_to_cpu_group(i); *sd = SD_SIBLING_INIT; sd->span = cpu_sibling_map[i]; - cpus_and(sd->span, sd->span, cpu_default_map); + cpus_and(sd->span, sd->span, *cpu_map); sd->parent = p; sd->groups = &sched_group_cpus[group]; #endif @@ -4977,7 +4967,7 @@ static void __devinit arch_init_sched_domains(void) /* Set up CPU (sibling) groups */ for_each_online_cpu(i) { cpumask_t this_sibling_map = cpu_sibling_map[i]; - cpus_and(this_sibling_map, this_sibling_map, cpu_default_map); + cpus_and(this_sibling_map, this_sibling_map, *cpu_map); if (i != first_cpu(this_sibling_map)) continue; @@ -4990,7 +4980,7 @@ static void __devinit arch_init_sched_domains(void) for (i = 0; i < MAX_NUMNODES; i++) { cpumask_t nodemask = node_to_cpumask(i); - cpus_and(nodemask, nodemask, cpu_default_map); + cpus_and(nodemask, nodemask, *cpu_map); if (cpus_empty(nodemask)) continue; @@ -5000,12 +4990,12 @@ static void __devinit arch_init_sched_domains(void) #ifdef CONFIG_NUMA /* Set up node groups */ - init_sched_build_groups(sched_group_nodes, cpu_default_map, + init_sched_build_groups(sched_group_nodes, *cpu_map, &cpu_to_node_group); #endif /* Calculate CPU power for physical packages and nodes */ - for_each_cpu_mask(i, cpu_default_map) { + for_each_cpu_mask(i, *cpu_map) { int power; struct sched_domain *sd; #ifdef CONFIG_SCHED_SMT @@ -5029,7 +5019,7 @@ static void __devinit arch_init_sched_domains(void) } /* Attach the domains */ - for_each_online_cpu(i) { + for_each_cpu_mask(i, *cpu_map) { struct sched_domain *sd; #ifdef CONFIG_SCHED_SMT sd = &per_cpu(cpu_domains, i); @@ -5039,16 +5029,71 @@ static void __devinit arch_init_sched_domains(void) cpu_attach_domain(sd, i); } } +/* + * Set up scheduler domains and groups. Callers must hold the hotplug lock. + */ +static void arch_init_sched_domains(cpumask_t *cpu_map) +{ + cpumask_t cpu_default_map; -#ifdef CONFIG_HOTPLUG_CPU -static void __devinit arch_destroy_sched_domains(void) +#if defined(CONFIG_SCHED_SMT) && defined(CONFIG_NUMA) + check_sibling_maps(); +#endif + /* + * Setup mask for cpus without special case scheduling requirements. + * For now this just excludes isolated cpus, but could be used to + * exclude other special cases in the future. + */ + cpus_andnot(cpu_default_map, *cpu_map, cpu_isolated_map); + + build_sched_domains(&cpu_default_map); +} + +static void arch_destroy_sched_domains(const cpumask_t *cpu_map) { /* Do nothing: everything is statically allocated. */ } -#endif #endif /* ARCH_HAS_SCHED_DOMAIN */ +/* + * Detach sched domains from a group of cpus specified in cpu_map + * These cpus will now be attached to the NULL domain + */ +static inline void detach_destroy_domains(const cpumask_t *cpu_map) +{ + int i; + + for_each_cpu_mask(i, *cpu_map) + cpu_attach_domain(NULL, i); + synchronize_sched(); + arch_destroy_sched_domains(cpu_map); +} + +/* + * Partition sched domains as specified by the cpumasks below. + * This attaches all cpus from the cpumasks to the NULL domain, + * waits for a RCU quiescent period, recalculates sched + * domain information and then attaches them back to the + * correct sched domains + * Call with hotplug lock held + */ +void partition_sched_domains(cpumask_t *partition1, cpumask_t *partition2) +{ + cpumask_t change_map; + + cpus_and(*partition1, *partition1, cpu_online_map); + cpus_and(*partition2, *partition2, cpu_online_map); + cpus_or(change_map, *partition1, *partition2); + + /* Detach sched domains from all of the affected cpus */ + detach_destroy_domains(&change_map); + if (!cpus_empty(*partition1)) + build_sched_domains(partition1); + if (!cpus_empty(*partition2)) + build_sched_domains(partition2); +} + #ifdef CONFIG_HOTPLUG_CPU /* * Force a reinitialization of the sched domains hierarchy. The domains @@ -5059,15 +5104,10 @@ static void __devinit arch_destroy_sched_domains(void) static int update_sched_domains(struct notifier_block *nfb, unsigned long action, void *hcpu) { - int i; - switch (action) { case CPU_UP_PREPARE: case CPU_DOWN_PREPARE: - for_each_online_cpu(i) - cpu_attach_domain(NULL, i); - synchronize_kernel(); - arch_destroy_sched_domains(); + detach_destroy_domains(&cpu_online_map); return NOTIFY_OK; case CPU_UP_CANCELED: @@ -5083,7 +5123,7 @@ static int update_sched_domains(struct notifier_block *nfb, } /* The hotplug lock is already held by cpu_up/cpu_down */ - arch_init_sched_domains(); + arch_init_sched_domains(&cpu_online_map); return NOTIFY_OK; } @@ -5092,7 +5132,7 @@ static int update_sched_domains(struct notifier_block *nfb, void __init sched_init_smp(void) { lock_cpu_hotplug(); - arch_init_sched_domains(); + arch_init_sched_domains(&cpu_online_map); unlock_cpu_hotplug(); /* XXX: Theoretical race here - CPU may be hotplugged now */ hotcpu_notifier(update_sched_domains, 0); -- cgit v1.2.3 From 85d7b94981e2e919697bc235aad7367b33c3864b Mon Sep 17 00:00:00 2001 From: Dinakar Guniguntala Date: Sat, 25 Jun 2005 14:57:34 -0700 Subject: [PATCH] Dynamic sched domains: cpuset changes Adds the core update_cpu_domains code and updated cpusets documentation Signed-off-by: Dinakar Guniguntala Acked-by: Paul Jackson Acked-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cpusets.txt | 16 +++++++++ kernel/cpuset.c | 89 ++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 92 insertions(+), 13 deletions(-) diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt index 2f8f24eaefd..ad944c06031 100644 --- a/Documentation/cpusets.txt +++ b/Documentation/cpusets.txt @@ -51,6 +51,14 @@ mems_allowed vector. If a cpuset is cpu or mem exclusive, no other cpuset, other than a direct ancestor or descendent, may share any of the same CPUs or Memory Nodes. +A cpuset that is cpu exclusive has a sched domain associated with it. +The sched domain consists of all cpus in the current cpuset that are not +part of any exclusive child cpusets. +This ensures that the scheduler load balacing code only balances +against the cpus that are in the sched domain as defined above and not +all of the cpus in the system. This removes any overhead due to +load balancing code trying to pull tasks outside of the cpu exclusive +cpuset only to be prevented by the tasks' cpus_allowed mask. User level code may create and destroy cpusets by name in the cpuset virtual file system, manage the attributes and permissions of these @@ -84,6 +92,9 @@ This can be especially valuable on: and a database), or * NUMA systems running large HPC applications with demanding performance characteristics. + * Also cpu_exclusive cpusets are useful for servers running orthogonal + workloads such as RT applications requiring low latency and HPC + applications that are throughput sensitive These subsets, or "soft partitions" must be able to be dynamically adjusted, as the job mix changes, without impacting other concurrently @@ -125,6 +136,8 @@ Cpusets extends these two mechanisms as follows: - A cpuset may be marked exclusive, which ensures that no other cpuset (except direct ancestors and descendents) may contain any overlapping CPUs or Memory Nodes. + Also a cpu_exclusive cpuset would be associated with a sched + domain. - You can list all the tasks (by pid) attached to any cpuset. The implementation of cpusets requires a few, simple hooks @@ -136,6 +149,9 @@ into the rest of the kernel, none in performance critical paths: allowed in that tasks cpuset. - in sched.c migrate_all_tasks(), to keep migrating tasks within the CPUs allowed by their cpuset, if possible. + - in sched.c, a new API partition_sched_domains for handling + sched domain changes associated with cpu_exclusive cpusets + and related changes in both sched.c and arch/ia64/kernel/domain.c - in the mbind and set_mempolicy system calls, to mask the requested Memory Nodes by what's allowed in that tasks cpuset. - in page_alloc, to restrict memory to allowed nodes. diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 79dd929f408..984c0bf3807 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -595,10 +595,62 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial) return 0; } +/* + * For a given cpuset cur, partition the system as follows + * a. All cpus in the parent cpuset's cpus_allowed that are not part of any + * exclusive child cpusets + * b. All cpus in the current cpuset's cpus_allowed that are not part of any + * exclusive child cpusets + * Build these two partitions by calling partition_sched_domains + * + * Call with cpuset_sem held. May nest a call to the + * lock_cpu_hotplug()/unlock_cpu_hotplug() pair. + */ +static void update_cpu_domains(struct cpuset *cur) +{ + struct cpuset *c, *par = cur->parent; + cpumask_t pspan, cspan; + + if (par == NULL || cpus_empty(cur->cpus_allowed)) + return; + + /* + * Get all cpus from parent's cpus_allowed not part of exclusive + * children + */ + pspan = par->cpus_allowed; + list_for_each_entry(c, &par->children, sibling) { + if (is_cpu_exclusive(c)) + cpus_andnot(pspan, pspan, c->cpus_allowed); + } + if (is_removed(cur) || !is_cpu_exclusive(cur)) { + cpus_or(pspan, pspan, cur->cpus_allowed); + if (cpus_equal(pspan, cur->cpus_allowed)) + return; + cspan = CPU_MASK_NONE; + } else { + if (cpus_empty(pspan)) + return; + cspan = cur->cpus_allowed; + /* + * Get all cpus from current cpuset's cpus_allowed not part + * of exclusive children + */ + list_for_each_entry(c, &cur->children, sibling) { + if (is_cpu_exclusive(c)) + cpus_andnot(cspan, cspan, c->cpus_allowed); + } + } + + lock_cpu_hotplug(); + partition_sched_domains(&pspan, &cspan); + unlock_cpu_hotplug(); +} + static int update_cpumask(struct cpuset *cs, char *buf) { struct cpuset trialcs; - int retval; + int retval, cpus_unchanged; trialcs = *cs; retval = cpulist_parse(buf, trialcs.cpus_allowed); @@ -608,9 +660,13 @@ static int update_cpumask(struct cpuset *cs, char *buf) if (cpus_empty(trialcs.cpus_allowed)) return -ENOSPC; retval = validate_change(cs, &trialcs); - if (retval == 0) - cs->cpus_allowed = trialcs.cpus_allowed; - return retval; + if (retval < 0) + return retval; + cpus_unchanged = cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed); + cs->cpus_allowed = trialcs.cpus_allowed; + if (is_cpu_exclusive(cs) && !cpus_unchanged) + update_cpu_domains(cs); + return 0; } static int update_nodemask(struct cpuset *cs, char *buf) @@ -646,7 +702,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf) { int turning_on; struct cpuset trialcs; - int err; + int err, cpu_exclusive_changed; turning_on = (simple_strtoul(buf, NULL, 10) != 0); @@ -657,13 +713,18 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf) clear_bit(bit, &trialcs.flags); err = validate_change(cs, &trialcs); - if (err == 0) { - if (turning_on) - set_bit(bit, &cs->flags); - else - clear_bit(bit, &cs->flags); - } - return err; + if (err < 0) + return err; + cpu_exclusive_changed = + (is_cpu_exclusive(cs) != is_cpu_exclusive(&trialcs)); + if (turning_on) + set_bit(bit, &cs->flags); + else + clear_bit(bit, &cs->flags); + + if (cpu_exclusive_changed) + update_cpu_domains(cs); + return 0; } static int attach_task(struct cpuset *cs, char *buf) @@ -1309,12 +1370,14 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) up(&cpuset_sem); return -EBUSY; } - spin_lock(&cs->dentry->d_lock); parent = cs->parent; set_bit(CS_REMOVED, &cs->flags); + if (is_cpu_exclusive(cs)) + update_cpu_domains(cs); list_del(&cs->sibling); /* delete my sibling from parent->children */ if (list_empty(&parent->children)) check_for_release(parent); + spin_lock(&cs->dentry->d_lock); d = dget(cs->dentry); cs->dentry = NULL; spin_unlock(&d->d_lock); -- cgit v1.2.3 From 7f1867a5b3dc3034cbea403b229d65eed4a7f62e Mon Sep 17 00:00:00 2001 From: Dinakar Guniguntala Date: Sat, 25 Jun 2005 14:57:36 -0700 Subject: [PATCH] Dynamic sched domains: ia64 changes ia64 changes similar to kernel/sched.c. Signed-off-by: Dinakar Guniguntala Acked-by: Paul Jackson Acked-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/kernel/domain.c | 76 ++++++++++++++++++++++++++++------------------- 1 file changed, 45 insertions(+), 31 deletions(-) diff --git a/arch/ia64/kernel/domain.c b/arch/ia64/kernel/domain.c index afbde79c3b3..d65e87b6394 100644 --- a/arch/ia64/kernel/domain.c +++ b/arch/ia64/kernel/domain.c @@ -27,7 +27,7 @@ * * Should use nodemask_t. */ -static int __devinit find_next_best_node(int node, unsigned long *used_nodes) +static int find_next_best_node(int node, unsigned long *used_nodes) { int i, n, val, min_val, best_node = 0; @@ -66,7 +66,7 @@ static int __devinit find_next_best_node(int node, unsigned long *used_nodes) * should be one that prevents unnecessary balancing, but also spreads tasks * out optimally. */ -static cpumask_t __devinit sched_domain_node_span(int node) +static cpumask_t sched_domain_node_span(int node) { int i; cpumask_t span, nodemask; @@ -96,7 +96,7 @@ static cpumask_t __devinit sched_domain_node_span(int node) #ifdef CONFIG_SCHED_SMT static DEFINE_PER_CPU(struct sched_domain, cpu_domains); static struct sched_group sched_group_cpus[NR_CPUS]; -static int __devinit cpu_to_cpu_group(int cpu) +static int cpu_to_cpu_group(int cpu) { return cpu; } @@ -104,7 +104,7 @@ static int __devinit cpu_to_cpu_group(int cpu) static DEFINE_PER_CPU(struct sched_domain, phys_domains); static struct sched_group sched_group_phys[NR_CPUS]; -static int __devinit cpu_to_phys_group(int cpu) +static int cpu_to_phys_group(int cpu) { #ifdef CONFIG_SCHED_SMT return first_cpu(cpu_sibling_map[cpu]); @@ -125,44 +125,36 @@ static struct sched_group *sched_group_nodes[MAX_NUMNODES]; static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); static struct sched_group sched_group_allnodes[MAX_NUMNODES]; -static int __devinit cpu_to_allnodes_group(int cpu) +static int cpu_to_allnodes_group(int cpu) { return cpu_to_node(cpu); } #endif /* - * Set up scheduler domains and groups. Callers must hold the hotplug lock. + * Build sched domains for a given set of cpus and attach the sched domains + * to the individual cpus */ -void __devinit arch_init_sched_domains(void) +void build_sched_domains(const cpumask_t *cpu_map) { int i; - cpumask_t cpu_default_map; /* - * Setup mask for cpus without special case scheduling requirements. - * For now this just excludes isolated cpus, but could be used to - * exclude other special cases in the future. + * Set up domains for cpus specified by the cpu_map. */ - cpus_complement(cpu_default_map, cpu_isolated_map); - cpus_and(cpu_default_map, cpu_default_map, cpu_online_map); - - /* - * Set up domains. Isolated domains just stay on the dummy domain. - */ - for_each_cpu_mask(i, cpu_default_map) { + for_each_cpu_mask(i, *cpu_map) { int group; struct sched_domain *sd = NULL, *p; cpumask_t nodemask = node_to_cpumask(cpu_to_node(i)); - cpus_and(nodemask, nodemask, cpu_default_map); + cpus_and(nodemask, nodemask, *cpu_map); #ifdef CONFIG_NUMA if (num_online_cpus() > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) { sd = &per_cpu(allnodes_domains, i); *sd = SD_ALLNODES_INIT; - sd->span = cpu_default_map; + sd->span = *cpu_map; group = cpu_to_allnodes_group(i); sd->groups = &sched_group_allnodes[group]; p = sd; @@ -173,7 +165,7 @@ void __devinit arch_init_sched_domains(void) *sd = SD_NODE_INIT; sd->span = sched_domain_node_span(cpu_to_node(i)); sd->parent = p; - cpus_and(sd->span, sd->span, cpu_default_map); + cpus_and(sd->span, sd->span, *cpu_map); #endif p = sd; @@ -190,7 +182,7 @@ void __devinit arch_init_sched_domains(void) group = cpu_to_cpu_group(i); *sd = SD_SIBLING_INIT; sd->span = cpu_sibling_map[i]; - cpus_and(sd->span, sd->span, cpu_default_map); + cpus_and(sd->span, sd->span, *cpu_map); sd->parent = p; sd->groups = &sched_group_cpus[group]; #endif @@ -198,9 +190,9 @@ void __devinit arch_init_sched_domains(void) #ifdef CONFIG_SCHED_SMT /* Set up CPU (sibling) groups */ - for_each_cpu_mask(i, cpu_default_map) { + for_each_cpu_mask(i, *cpu_map) { cpumask_t this_sibling_map = cpu_sibling_map[i]; - cpus_and(this_sibling_map, this_sibling_map, cpu_default_map); + cpus_and(this_sibling_map, this_sibling_map, *cpu_map); if (i != first_cpu(this_sibling_map)) continue; @@ -213,7 +205,7 @@ void __devinit arch_init_sched_domains(void) for (i = 0; i < MAX_NUMNODES; i++) { cpumask_t nodemask = node_to_cpumask(i); - cpus_and(nodemask, nodemask, cpu_default_map); + cpus_and(nodemask, nodemask, *cpu_map); if (cpus_empty(nodemask)) continue; @@ -222,7 +214,7 @@ void __devinit arch_init_sched_domains(void) } #ifdef CONFIG_NUMA - init_sched_build_groups(sched_group_allnodes, cpu_default_map, + init_sched_build_groups(sched_group_allnodes, *cpu_map, &cpu_to_allnodes_group); for (i = 0; i < MAX_NUMNODES; i++) { @@ -233,12 +225,12 @@ void __devinit arch_init_sched_domains(void) cpumask_t covered = CPU_MASK_NONE; int j; - cpus_and(nodemask, nodemask, cpu_default_map); + cpus_and(nodemask, nodemask, *cpu_map); if (cpus_empty(nodemask)) continue; domainspan = sched_domain_node_span(i); - cpus_and(domainspan, domainspan, cpu_default_map); + cpus_and(domainspan, domainspan, *cpu_map); sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL); sched_group_nodes[i] = sg; @@ -266,7 +258,7 @@ void __devinit arch_init_sched_domains(void) int n = (i + j) % MAX_NUMNODES; cpus_complement(notcovered, covered); - cpus_and(tmp, notcovered, cpu_default_map); + cpus_and(tmp, notcovered, *cpu_map); cpus_and(tmp, tmp, domainspan); if (cpus_empty(tmp)) break; @@ -293,7 +285,7 @@ void __devinit arch_init_sched_domains(void) #endif /* Calculate CPU power for physical packages and nodes */ - for_each_cpu_mask(i, cpu_default_map) { + for_each_cpu_mask(i, *cpu_map) { int power; struct sched_domain *sd; #ifdef CONFIG_SCHED_SMT @@ -359,13 +351,35 @@ next_sg: cpu_attach_domain(sd, i); } } +/* + * Set up scheduler domains and groups. Callers must hold the hotplug lock. + */ +void arch_init_sched_domains(const cpumask_t *cpu_map) +{ + cpumask_t cpu_default_map; + + /* + * Setup mask for cpus without special case scheduling requirements. + * For now this just excludes isolated cpus, but could be used to + * exclude other special cases in the future. + */ + cpus_andnot(cpu_default_map, *cpu_map, cpu_isolated_map); + + build_sched_domains(&cpu_default_map); +} -void __devinit arch_destroy_sched_domains(void) +void arch_destroy_sched_domains(const cpumask_t *cpu_map) { #ifdef CONFIG_NUMA int i; for (i = 0; i < MAX_NUMNODES; i++) { + cpumask_t nodemask = node_to_cpumask(i); struct sched_group *oldsg, *sg = sched_group_nodes[i]; + + cpus_and(nodemask, nodemask, *cpu_map); + if (cpus_empty(nodemask)) + continue; + if (sg == NULL) continue; sg = sg->next; -- cgit v1.2.3 From cc19ca86a023fcd552c78e77a7be6ce271f92a28 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 25 Jun 2005 14:57:36 -0700 Subject: [PATCH] consolidate PREEMPT options into kernel/Kconfig.preempt This patch consolidates the CONFIG_PREEMPT and CONFIG_PREEMPT_BKL preemption options into kernel/Kconfig.preempt. This, besides reducing source-code, also enables more centralized tweaking of preemption related options. Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/Kconfig | 23 +---------------------- arch/ppc64/Kconfig | 21 +-------------------- arch/x86_64/Kconfig | 29 ++--------------------------- kernel/Kconfig.preempt | 24 ++++++++++++++++++++++++ 4 files changed, 28 insertions(+), 69 deletions(-) create mode 100644 kernel/Kconfig.preempt diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index b4cd11e5845..961ab20032f 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -510,28 +510,7 @@ config SCHED_SMT cost of slightly increased overhead in some places. If unsure say N here. -config PREEMPT - bool "Preemptible Kernel" - help - This option reduces the latency of the kernel when reacting to - real-time or interactive events by allowing a low priority process to - be preempted even if it is in kernel mode executing a system call. - This allows applications to run more reliably even when the system is - under load. - - Say Y here if you are building a kernel for a desktop, embedded - or real-time system. Say N if you are unsure. - -config PREEMPT_BKL - bool "Preempt The Big Kernel Lock" - depends on PREEMPT - default y - help - This option reduces the latency of the kernel by making the - big kernel lock preemptible. - - Say Y here if you are building a kernel for a desktop system. - Say N if you are unsure. +source "kernel/Kconfig.preempt" config X86_UP_APIC bool "Local APIC support on uniprocessors" diff --git a/arch/ppc64/Kconfig b/arch/ppc64/Kconfig index cb27068bfcd..5f40b438b58 100644 --- a/arch/ppc64/Kconfig +++ b/arch/ppc64/Kconfig @@ -270,26 +270,7 @@ config SCHED_SMT when dealing with POWER5 cpus at a cost of slightly increased overhead in some places. If unsure say N here. -config PREEMPT - bool "Preemptible Kernel" - help - This option reduces the latency of the kernel when reacting to - real-time or interactive events by allowing a low priority process to - be preempted even if it is in kernel mode executing a system call. - - Say Y here if you are building a kernel for a desktop, embedded - or real-time system. Say N if you are unsure. - -config PREEMPT_BKL - bool "Preempt The Big Kernel Lock" - depends on PREEMPT - default y - help - This option reduces the latency of the kernel by making the - big kernel lock preemptible. - - Say Y here if you are building a kernel for a desktop system. - Say N if you are unsure. +source "kernel/Kconfig.preempt" config EEH bool "PCI Extended Error Handling (EEH)" if EMBEDDED diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 92f5a526602..a853d87ff7e 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -207,33 +207,6 @@ config SMP If you don't know what to do here, say N. -config PREEMPT - bool "Preemptible Kernel" - ---help--- - This option reduces the latency of the kernel when reacting to - real-time or interactive events by allowing a low priority process to - be preempted even if it is in kernel mode executing a system call. - This allows applications to run more reliably even when the system is - under load. On contrary it may also break your drivers and add - priority inheritance problems to your system. Don't select it if - you rely on a stable system or have slightly obscure hardware. - It's also not very well tested on x86-64 currently. - You have been warned. - - Say Y here if you are feeling brave and building a kernel for a - desktop, embedded or real-time system. Say N if you are unsure. - -config PREEMPT_BKL - bool "Preempt The Big Kernel Lock" - depends on PREEMPT - default y - help - This option reduces the latency of the kernel by making the - big kernel lock preemptible. - - Say Y here if you are building a kernel for a desktop system. - Say N if you are unsure. - config SCHED_SMT bool "SMT (Hyperthreading) scheduler support" depends on SMP @@ -244,6 +217,8 @@ config SCHED_SMT cost of slightly increased overhead in some places. If unsure say N here. +source "kernel/Kconfig.preempt" + config K8_NUMA bool "K8 NUMA support" select NUMA diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt new file mode 100644 index 00000000000..587328be821 --- /dev/null +++ b/kernel/Kconfig.preempt @@ -0,0 +1,24 @@ + +config PREEMPT + bool "Preemptible Kernel" + help + This option reduces the latency of the kernel when reacting to + real-time or interactive events by allowing a low priority process to + be preempted even if it is in kernel mode executing a system call. + This allows applications to run more reliably even when the system is + under load. + + Say Y here if you are building a kernel for a desktop, embedded + or real-time system. Say N if you are unsure. + +config PREEMPT_BKL + bool "Preempt The Big Kernel Lock" + depends on PREEMPT + default y + help + This option reduces the latency of the kernel by making the + big kernel lock preemptible. + + Say Y here if you are building a kernel for a desktop system. + Say N if you are unsure. + -- cgit v1.2.3 From f704f56af95bec3c1ca719d64d0becef74d40899 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 25 Jun 2005 14:57:38 -0700 Subject: [PATCH] enable PREEMPT_BKL on !PREEMPT+SMP too The only sane way to clean up the current 3 lock_kernel() variants seems to be to remove the spinlock-based BKL implementations altogether, and to keep the semaphore-based one only. If we dont want to do that for whatever reason then i'm afraid we have to live with the current complexity. (but i'm open for other cleanup suggestions as well.) To explore this possibility we'll (at a minimum) have to know whether the semaphore-based BKL works fine on plain SMP too. The patch below enables this. The patch may make sense in isolation as well, as it might bring performance benefits: code that would formerly spin on the BKL spinlock will now schedule away and give up the CPU. It might introduce performance regressions as well, if any performance-critical code uses the BKL heavily and gets overscheduled due to the semaphore. I very much hope there is no such performance-critical codepath left though. Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/Kconfig.preempt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index 587328be821..34c631221aa 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -13,7 +13,7 @@ config PREEMPT config PREEMPT_BKL bool "Preempt The Big Kernel Lock" - depends on PREEMPT + depends on SMP || PREEMPT default y help This option reduces the latency of the kernel by making the -- cgit v1.2.3 From f8cbd99bd3a023db8d6356d19a5f6f539d367327 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 25 Jun 2005 14:57:39 -0700 Subject: [PATCH] sched: voluntary kernel preemption This patch adds a new preemption model: 'Voluntary Kernel Preemption'. The 3 models can be selected from a new menu: (X) No Forced Preemption (Server) ( ) Voluntary Kernel Preemption (Desktop) ( ) Preemptible Kernel (Low-Latency Desktop) we still default to the stock (Server) preemption model. Voluntary preemption works by adding a cond_resched() (reschedule-if-needed) call to every might_sleep() check. It is lighter than CONFIG_PREEMPT - at the cost of not having as tight latencies. It represents a different latency/complexity/overhead tradeoff. It has no runtime impact at all if disabled. Here are size stats that show how the various preemption models impact the kernel's size: text data bss dec hex filename 3618774 547184 179896 4345854 424ffe vmlinux.stock 3626406 547184 179896 4353486 426dce vmlinux.voluntary +0.2% 3748414 548640 179896 4476950 445016 vmlinux.preempt +3.5% voluntary-preempt is +0.2% of .text, preempt is +3.5%. This feature has been tested for many months by lots of people (and it's also included in the RHEL4 distribution and earlier variants were in Fedora as well), and it's intended for users and distributions who dont want to use full-blown CONFIG_PREEMPT for one reason or another. Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 18 +++++++++++----- kernel/Kconfig.preempt | 57 +++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 62 insertions(+), 13 deletions(-) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index e25b97062ce..687ba8c9973 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -58,15 +58,23 @@ struct completion; * be biten later when the calling function happens to sleep when it is not * supposed to. */ +#ifdef CONFIG_PREEMPT_VOLUNTARY +extern int cond_resched(void); +# define might_resched() cond_resched() +#else +# define might_resched() do { } while (0) +#endif + #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP -#define might_sleep() __might_sleep(__FILE__, __LINE__) -#define might_sleep_if(cond) do { if (unlikely(cond)) might_sleep(); } while (0) -void __might_sleep(char *file, int line); + void __might_sleep(char *file, int line); +# define might_sleep() \ + do { __might_sleep(__FILE__, __LINE__); might_resched(); } while (0) #else -#define might_sleep() do {} while(0) -#define might_sleep_if(cond) do {} while (0) +# define might_sleep() do { might_resched(); } while (0) #endif +#define might_sleep_if(cond) do { if (unlikely(cond)) might_sleep(); } while (0) + #define abs(x) ({ \ int __x = (x); \ (__x < 0) ? -__x : __x; \ diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index 34c631221aa..0b46a5dff4c 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -1,15 +1,56 @@ -config PREEMPT - bool "Preemptible Kernel" +choice + prompt "Preemption Model" + default PREEMPT_NONE + +config PREEMPT_NONE + bool "No Forced Preemption (Server)" + help + This is the traditional Linux preemption model, geared towards + throughput. It will still provide good latencies most of the + time, but there are no guarantees and occasional longer delays + are possible. + + Select this option if you are building a kernel for a server or + scientific/computation system, or if you want to maximize the + raw processing power of the kernel, irrespective of scheduling + latencies. + +config PREEMPT_VOLUNTARY + bool "Voluntary Kernel Preemption (Desktop)" help - This option reduces the latency of the kernel when reacting to - real-time or interactive events by allowing a low priority process to - be preempted even if it is in kernel mode executing a system call. - This allows applications to run more reliably even when the system is + This option reduces the latency of the kernel by adding more + "explicit preemption points" to the kernel code. These new + preemption points have been selected to reduce the maximum + latency of rescheduling, providing faster application reactions, + at the cost of slighly lower throughput. + + This allows reaction to interactive events by allowing a + low priority process to voluntarily preempt itself even if it + is in kernel mode executing a system call. This allows + applications to run more 'smoothly' even when the system is under load. - Say Y here if you are building a kernel for a desktop, embedded - or real-time system. Say N if you are unsure. + Select this if you are building a kernel for a desktop system. + +config PREEMPT + bool "Preemptible Kernel (Low-Latency Desktop)" + help + This option reduces the latency of the kernel by making + all kernel code (that is not executing in a critical section) + preemptible. This allows reaction to interactive events by + permitting a low priority process to be preempted involuntarily + even if it is in kernel mode executing a system call and would + otherwise not be about to reach a natural preemption point. + This allows applications to run more 'smoothly' even when the + system is under load, at the cost of slighly lower throughput + and a slight runtime overhead to kernel code. + + Select this if you are building a kernel for a desktop or + embedded system with latency requirements in the milliseconds + range. + +endchoice config PREEMPT_BKL bool "Preempt The Big Kernel Lock" -- cgit v1.2.3 From 8f43d03fe2c4962c11d8227ac9505e590bad758b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 25 Jun 2005 14:57:40 -0700 Subject: [PATCH] kexec: x86: rename APIC_MODE_EXINT From: "Maciej W. Rozycki" Rename APIC_MODE_EXINT to APIC_MODE_EXTINT - I think it should be named after what the mode is called in documentation. From: "Eric W. Biederman" I have reduced this patch to just the name change in the header. And integrated the changes into the patches that add those lines. Otherwise I ran into some ugly dependencies. Signed-off-by: Maciej W. Rozycki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-i386/apicdef.h | 2 +- include/asm-x86_64/apicdef.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/asm-i386/apicdef.h b/include/asm-i386/apicdef.h index c689554ad5b..41e8d2d918e 100644 --- a/include/asm-i386/apicdef.h +++ b/include/asm-i386/apicdef.h @@ -90,7 +90,7 @@ #define SET_APIC_DELIVERY_MODE(x,y) (((x)&~0x700)|((y)<<8)) #define APIC_MODE_FIXED 0x0 #define APIC_MODE_NMI 0x4 -#define APIC_MODE_EXINT 0x7 +#define APIC_MODE_EXTINT 0x7 #define APIC_LVT1 0x360 #define APIC_LVTERR 0x370 #define APIC_TMICT 0x380 diff --git a/include/asm-x86_64/apicdef.h b/include/asm-x86_64/apicdef.h index bfebdb69065..9388062c4f6 100644 --- a/include/asm-x86_64/apicdef.h +++ b/include/asm-x86_64/apicdef.h @@ -94,7 +94,7 @@ #define SET_APIC_DELIVERY_MODE(x,y) (((x)&~0x700)|((y)<<8)) #define APIC_MODE_FIXED 0x0 #define APIC_MODE_NMI 0x4 -#define APIC_MODE_EXINT 0x7 +#define APIC_MODE_EXTINT 0x7 #define APIC_LVT1 0x360 #define APIC_LVTERR 0x370 #define APIC_TMICT 0x380 -- cgit v1.2.3 From 9635b47d910223745258768418003580ef7dba17 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 25 Jun 2005 14:57:41 -0700 Subject: [PATCH] kexec: x86: local apic fix From: "Maciej W. Rozycki" Fix a kexec problem whcih causes local APIC detection failure. The problem is detect_init_APIC() is called early, before the command line have been processed. Therefore "lapic" (and "nolapic") have not been seen, yet. Signed-off-by: Maciej W. Rozycki Signed-off-by: Eric Biederman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/apic.c | 25 +++++-------------------- arch/i386/kernel/setup.c | 11 +++++++++++ include/asm-i386/apic.h | 13 +++++++++++++ 3 files changed, 29 insertions(+), 20 deletions(-) diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index b905d7bb9a0..cf45bed96d0 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -40,6 +40,11 @@ #include "io_ports.h" +/* + * Knob to control our willingness to enable the local APIC. + */ +int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */ + /* * Debug level */ @@ -666,26 +671,6 @@ static void apic_pm_activate(void) { } * Original code written by Keir Fraser. */ -/* - * Knob to control our willingness to enable the local APIC. - */ -int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */ - -static int __init lapic_disable(char *str) -{ - enable_local_apic = -1; - clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); - return 0; -} -__setup("nolapic", lapic_disable); - -static int __init lapic_enable(char *str) -{ - enable_local_apic = 1; - return 0; -} -__setup("lapic", lapic_enable); - static int __init apic_set_verbosity(char *str) { if (strcmp("debug", str) == 0) diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index cba67e4ba0a..f1ad9fdeaad 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -44,6 +44,7 @@ #include #include #include