From bf6285278418f1dc6f07296bbb286da0bfe26d5d Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Fri, 8 Dec 2006 16:14:22 -0800
Subject: [IA64] Itanium MC Error Injection Tool: Doc and sample application

This patch contains a documention and sample application. Since the sample
application has ~1000 lines of code, it might not be suitable in a kernel
documention in kenrel tree. If you think this is not good place to hold
the sample application, please let me know and I'm open to other choices
e.g. sourceforge etc.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 Documentation/ia64/err_inject.txt | 1068 +++++++++++++++++++++++++++++++++++++
 1 file changed, 1068 insertions(+)
 create mode 100644 Documentation/ia64/err_inject.txt

(limited to 'Documentation')

diff --git a/Documentation/ia64/err_inject.txt b/Documentation/ia64/err_inject.txt
new file mode 100644
index 00000000000..26487c172cf
--- /dev/null
+++ b/Documentation/ia64/err_inject.txt
@@ -0,0 +1,1068 @@
+
+IPF Machine Check (MC) error inject tool
+========================================
+
+IPF Machine Check (MC) error inject tool is used to inject MC
+errors from Linux. The tool is a test bed for IPF MC work flow including
+hardware correctable error handling, OS recoverable error handling, MC
+event logging, etc.
+
+The tool includes two parts: a kernel driver and a user application
+sample. The driver provides interface to PAL to inject error
+and query error injection capabilities. The driver code is in
+arch/ia64/kernel/err_inject.c. The application sample (shown below)
+provides a combination of various errors and calls the driver's interface
+(sysfs interface) to inject errors or query error injection capabilities.
+
+The tool can be used to test Intel IPF machine MC handling capabilities.
+It's especially useful for people who can not access hardware MC injection
+tool to inject error. It's also very useful to integrate with other
+software test suits to do stressful testing on IPF.
+
+Below is a sample application as part of the whole tool. The sample
+can be used as a working test tool. Or it can be expanded to include
+more features. It also can be a integrated into a libary or other user
+application to have more thorough test.
+
+The sample application takes err.conf as error configuation input. Gcc
+compiles the code. After you install err_inject driver, you can run
+this sample application to inject errors.
+
+Errata: Itanium 2 Processors Specification Update lists some errata against
+the pal_mc_error_inject PAL procedure. The following err.conf has been tested
+on latest Montecito PAL.
+
+err.conf:
+
+#This is configuration file for err_inject_tool.
+#The format of the each line is:
+#cpu, loop, interval, err_type_info, err_struct_info, err_data_buffer
+#where
+#	cpu: logical cpu number the error will be inject in.
+#	loop: times the error will be injected.
+#	interval: In second. every so often one error is injected.
+#	err_type_info, err_struct_info: PAL parameters.
+#
+#Note: All values are hex w/o or w/ 0x prefix.
+
+
+#On cpu2, inject only total 0x10 errors, interval 5 seconds
+#corrected, data cache, hier-2, physical addr(assigned by tool code).
+#working on Montecito latest PAL.
+2, 10, 5, 4101, 95
+
+#On cpu4, inject and consume total 0x10 errors, interval 5 seconds
+#corrected, data cache, hier-2, physical addr(assigned by tool code).
+#working on Montecito latest PAL.
+4, 10, 5, 4109, 95
+
+#On cpu15, inject and consume total 0x10 errors, interval 5 seconds
+#recoverable, DTR0, hier-2.
+#working on Montecito latest PAL.
+0xf, 0x10, 5, 4249, 15
+
+The sample application source code:
+
+err_injection_tool.c:
+
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Copyright (C) 2006 Intel Co
+ *	Fenghua Yu <fenghua.yu@intel.com>
+ *
+ */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <sched.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/ipc.h>
+#include <sys/sem.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
+#include <sys/shm.h>
+
+#define MAX_FN_SIZE 		256
+#define MAX_BUF_SIZE 		256
+#define DATA_BUF_SIZE 		256
+#define NR_CPUS 		512
+#define MAX_TASK_NUM		2048
+#define MIN_INTERVAL		5	// seconds
+#define	ERR_DATA_BUFFER_SIZE 	3	// Three 8-byte.
+#define PARA_FIELD_NUM		5
+#define MASK_SIZE		(NR_CPUS/64)
+#define PATH_FORMAT "/sys/devices/system/node/node0/cpu%d/err_inject/"
+
+int sched_setaffinity(pid_t pid, unsigned int len, unsigned long *mask);
+
+int verbose;
+#define vbprintf if (verbose) printf
+
+int log_info(int cpu, const char *fmt, ...)
+{
+	FILE *log;
+	char fn[MAX_FN_SIZE];
+	char buf[MAX_BUF_SIZE];
+	va_list args;
+
+	sprintf(fn, "%d.log", cpu);
+	log=fopen(fn, "a+");
+	if (log==NULL) {
+		perror("Error open:");
+		return -1;
+	}
+
+	va_start(args, fmt);
+	vprintf(fmt, args);
+	memset(buf, 0, MAX_BUF_SIZE);
+	vsprintf(buf, fmt, args);
+	va_end(args);
+
+	fwrite(buf, sizeof(buf), 1, log);
+	fclose(log);
+
+	return 0;
+}
+
+typedef unsigned long u64;
+typedef unsigned int  u32;
+
+typedef union err_type_info_u {
+	struct {
+		u64	mode		: 3,	/* 0-2 */
+			err_inj		: 3,	/* 3-5 */
+			err_sev		: 2,	/* 6-7 */
+			err_struct	: 5,	/* 8-12 */
+			struct_hier	: 3,	/* 13-15 */
+			reserved	: 48;	/* 16-63 */
+	} err_type_info_u;
+	u64	err_type_info;
+} err_type_info_t;
+
+typedef union err_struct_info_u {
+	struct {
+		u64	siv		: 1,	/* 0	 */
+			c_t		: 2,	/* 1-2	 */
+			cl_p		: 3,	/* 3-5	 */
+			cl_id		: 3,	/* 6-8	 */
+			cl_dp		: 1,	/* 9	 */
+			reserved1	: 22,	/* 10-31 */
+			tiv		: 1,	/* 32	 */
+			trigger		: 4,	/* 33-36 */
+			trigger_pl 	: 3,	/* 37-39 */
+			reserved2 	: 24;	/* 40-63 */
+	} err_struct_info_cache;
+	struct {
+		u64	siv		: 1,	/* 0	 */
+			tt		: 2,	/* 1-2	 */
+			tc_tr		: 2,	/* 3-4	 */
+			tr_slot		: 8,	/* 5-12	 */
+			reserved1	: 19,	/* 13-31 */
+			tiv		: 1,	/* 32	 */
+			trigger		: 4,	/* 33-36 */
+			trigger_pl 	: 3,	/* 37-39 */
+			reserved2 	: 24;	/* 40-63 */
+	} err_struct_info_tlb;
+	struct {
+		u64	siv		: 1,	/* 0	 */
+			regfile_id	: 4,	/* 1-4	 */
+			reg_num		: 7,	/* 5-11	 */
+			reserved1	: 20,	/* 12-31 */
+			tiv		: 1,	/* 32	 */
+			trigger		: 4,	/* 33-36 */
+			trigger_pl 	: 3,	/* 37-39 */
+			reserved2 	: 24;	/* 40-63 */
+	} err_struct_info_register;
+	struct {
+		u64	reserved;
+	} err_struct_info_bus_processor_interconnect;
+	u64	err_struct_info;
+} err_struct_info_t;
+
+typedef union err_data_buffer_u {
+	struct {
+		u64	trigger_addr;		/* 0-63		*/
+		u64	inj_addr;		/* 64-127 	*/
+		u64	way		: 5,	/* 128-132	*/
+			index		: 20,	/* 133-152	*/
+					: 39;	/* 153-191	*/
+	} err_data_buffer_cache;
+	struct {
+		u64	trigger_addr;		/* 0-63		*/
+		u64	inj_addr;		/* 64-127 	*/
+		u64	way		: 5,	/* 128-132	*/
+			index		: 20,	/* 133-152	*/
+			reserved	: 39;	/* 153-191	*/
+	} err_data_buffer_tlb;
+	struct {
+		u64	trigger_addr;		/* 0-63		*/
+	} err_data_buffer_register;
+	struct {
+		u64	reserved;		/* 0-63		*/
+	} err_data_buffer_bus_processor_interconnect;
+	u64 err_data_buffer[ERR_DATA_BUFFER_SIZE];
+} err_data_buffer_t;
+
+typedef union capabilities_u {
+	struct {
+		u64	i		: 1,
+			d		: 1,
+			rv		: 1,
+			tag		: 1,
+			data		: 1,
+			mesi		: 1,
+			dp		: 1,
+			reserved1	: 3,
+			pa		: 1,
+			va		: 1,
+			wi		: 1,
+			reserved2	: 20,
+			trigger		: 1,
+			trigger_pl	: 1,
+			reserved3	: 30;
+	} capabilities_cache;
+	struct {
+		u64	d		: 1,
+			i		: 1,
+			rv		: 1,
+			tc		: 1,
+			tr		: 1,
+			reserved1	: 27,
+			trigger		: 1,
+			trigger_pl	: 1,
+			reserved2	: 30;
+	} capabilities_tlb;
+	struct {
+		u64	gr_b0		: 1,
+			gr_b1		: 1,
+			fr		: 1,
+			br		: 1,
+			pr		: 1,
+			ar		: 1,
+			cr		: 1,
+			rr		: 1,
+			pkr		: 1,
+			dbr		: 1,
+			ibr		: 1,
+			pmc		: 1,
+			pmd		: 1,
+			reserved1	: 3,
+			regnum		: 1,
+			reserved2	: 15,
+			trigger		: 1,
+			trigger_pl	: 1,
+			reserved3	: 30;
+	} capabilities_register;
+	struct {
+		u64	reserved;
+	} capabilities_bus_processor_interconnect;
+} capabilities_t;
+
+typedef struct resources_s {
+	u64	ibr0		: 1,
+		ibr2		: 1,
+		ibr4		: 1,
+		ibr6		: 1,
+		dbr0		: 1,
+		dbr2		: 1,
+		dbr4		: 1,
+		dbr6		: 1,
+		reserved	: 48;
+} resources_t;
+
+
+long get_page_size(void)
+{
+	long page_size=sysconf(_SC_PAGESIZE);
+	return page_size;
+}
+
+#define PAGE_SIZE (get_page_size()==-1?0x4000:get_page_size())
+#define SHM_SIZE (2*PAGE_SIZE*NR_CPUS)
+#define SHM_VA 0x2000000100000000
+
+int shmid;
+void *shmaddr;
+
+int create_shm(void)
+{
+	key_t key;
+	char fn[MAX_FN_SIZE];
+
+	/* cpu0 is always existing */
+	sprintf(fn, PATH_FORMAT, 0);
+	if ((key = ftok(fn, 's')) == -1) {
+		perror("ftok");
+		return -1;
+	}
+
+	shmid = shmget(key, SHM_SIZE, 0644 | IPC_CREAT);
+	if (shmid == -1) {
+		if (errno==EEXIST) {
+			shmid = shmget(key, SHM_SIZE, 0);
+			if (shmid == -1) {
+				perror("shmget");
+				return -1;
+			}
+		}
+		else {
+			perror("shmget");
+			return -1;
+		}
+	}
+	vbprintf("shmid=%d", shmid);
+
+	/* connect to the segment: */
+	shmaddr = shmat(shmid, (void *)SHM_VA, 0);
+	if (shmaddr == (void*)-1) {
+		perror("shmat");
+		return -1;
+	}
+
+	memset(shmaddr, 0, SHM_SIZE);
+	mlock(shmaddr, SHM_SIZE);
+
+	return 0;
+}
+
+int free_shm()
+{
+	munlock(shmaddr, SHM_SIZE);
+        shmdt(shmaddr);
+	semctl(shmid, 0, IPC_RMID);
+
+	return 0;
+}
+
+#ifdef _SEM_SEMUN_UNDEFINED
+union semun
+{
+	int val;
+	struct semid_ds *buf;
+	unsigned short int *array;
+	struct seminfo *__buf;
+};
+#endif
+
+u32 mode=1; /* 1: physical mode; 2: virtual mode. */
+int one_lock=1;
+key_t key[NR_CPUS];
+int semid[NR_CPUS];
+
+int create_sem(int cpu)
+{
+	union semun arg;
+	char fn[MAX_FN_SIZE];
+	int sid;
+
+	sprintf(fn, PATH_FORMAT, cpu);
+	sprintf(fn, "%s/%s", fn, "err_type_info");
+	if ((key[cpu] = ftok(fn, 'e')) == -1) {
+		perror("ftok");
+		return -1;
+	}
+
+	if (semid[cpu]!=0)
+		return 0;
+
+	/* clear old semaphore */
+	if ((sid = semget(key[cpu], 1, 0)) != -1)
+		semctl(sid, 0, IPC_RMID);
+
+	/* get one semaphore */
+	if ((semid[cpu] = semget(key[cpu], 1, IPC_CREAT | IPC_EXCL)) == -1) {
+		perror("semget");
+		printf("Please remove semaphore with key=0x%lx, then run the tool.\n",
+			(u64)key[cpu]);
+		return -1;
+	}
+
+	vbprintf("semid[%d]=0x%lx, key[%d]=%lx\n",cpu,(u64)semid[cpu],cpu,
+		(u64)key[cpu]);
+	/* initialize the semaphore to 1: */
+	arg.val = 1;
+	if (semctl(semid[cpu], 0, SETVAL, arg) == -1) {
+		perror("semctl");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int lock(int cpu)
+{
+	struct sembuf lock;
+
+	lock.sem_num = cpu;
+	lock.sem_op = 1;
+	semop(semid[cpu], &lock, 1);
+
+        return 0;
+}
+
+static int unlock(int cpu)
+{
+	struct sembuf unlock;
+
+	unlock.sem_num = cpu;
+	unlock.sem_op = -1;
+	semop(semid[cpu], &unlock, 1);
+
+        return 0;
+}
+
+void free_sem(int cpu)
+{
+	semctl(semid[cpu], 0, IPC_RMID);
+}
+
+int wr_multi(char *fn, unsigned long *data, int size)
+{
+	int fd;
+	char buf[MAX_BUF_SIZE];
+	int ret;
+
+	if (size==1)
+		sprintf(buf, "%lx", *data);
+	else if (size==3)
+		sprintf(buf, "%lx,%lx,%lx", data[0], data[1], data[2]);
+	else {
+		fprintf(stderr,"write to file with wrong size!\n");
+		return -1;
+	}
+
+	fd=open(fn, O_RDWR);
+	if (!fd) {
+		perror("Error:");
+		return -1;
+	}
+	ret=write(fd, buf, sizeof(buf));
+	close(fd);
+	return ret;
+}
+
+int wr(char *fn, unsigned long data)
+{
+	return wr_multi(fn, &data, 1);
+}
+
+int rd(char *fn, unsigned long *data)
+{
+	int fd;
+	char buf[MAX_BUF_SIZE];
+
+	fd=open(fn, O_RDONLY);
+	if (fd<0) {
+		perror("Error:");
+		return -1;
+	}
+	read(fd, buf, MAX_BUF_SIZE);
+	*data=strtoul(buf, NULL, 16);
+	close(fd);
+	return 0;
+}
+
+int rd_status(char *path, int *status)
+{
+	char fn[MAX_FN_SIZE];
+	sprintf(fn, "%s/status", path);
+	if (rd(fn, (u64*)status)<0) {
+		perror("status reading error.\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+int rd_capabilities(char *path, u64 *capabilities)
+{
+	char fn[MAX_FN_SIZE];
+	sprintf(fn, "%s/capabilities", path);
+	if (rd(fn, capabilities)<0) {
+		perror("capabilities reading error.\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+int rd_all(char *path)
+{
+	unsigned long err_type_info, err_struct_info, err_data_buffer;
+	int status;
+	unsigned long capabilities, resources;
+	char fn[MAX_FN_SIZE];
+
+	sprintf(fn, "%s/err_type_info", path);
+	if (rd(fn, &err_type_info)<0) {
+		perror("err_type_info reading error.\n");
+		return -1;
+	}
+	printf("err_type_info=%lx\n", err_type_info);
+
+	sprintf(fn, "%s/err_struct_info", path);
+	if (rd(fn, &err_struct_info)<0) {
+		perror("err_struct_info reading error.\n");
+		return -1;
+	}
+	printf("err_struct_info=%lx\n", err_struct_info);
+
+	sprintf(fn, "%s/err_data_buffer", path);
+	if (rd(fn, &err_data_buffer)<0) {
+		perror("err_data_buffer reading error.\n");
+		return -1;
+	}
+	printf("err_data_buffer=%lx\n", err_data_buffer);
+
+	sprintf(fn, "%s/status", path);
+	if (rd("status", (u64*)&status)<0) {
+		perror("status reading error.\n");
+		return -1;
+	}
+	printf("status=%d\n", status);
+
+	sprintf(fn, "%s/capabilities", path);
+	if (rd(fn,&capabilities)<0) {
+		perror("capabilities reading error.\n");
+		return -1;
+	}
+	printf("capabilities=%lx\n", capabilities);
+
+	sprintf(fn, "%s/resources", path);
+	if (rd(fn, &resources)<0) {
+		perror("resources reading error.\n");
+		return -1;
+	}
+	printf("resources=%lx\n", resources);
+
+	return 0;
+}
+
+int query_capabilities(char *path, err_type_info_t err_type_info,
+			u64 *capabilities)
+{
+	char fn[MAX_FN_SIZE];
+	err_struct_info_t err_struct_info;
+	err_data_buffer_t err_data_buffer;
+
+	err_struct_info.err_struct_info=0;
+	memset(err_data_buffer.err_data_buffer, -1, ERR_DATA_BUFFER_SIZE*8);
+
+	sprintf(fn, "%s/err_type_info", path);
+	wr(fn, err_type_info.err_type_info);
+	sprintf(fn, "%s/err_struct_info", path);
+	wr(fn, 0x0);
+	sprintf(fn, "%s/err_data_buffer", path);
+	wr_multi(fn, err_data_buffer.err_data_buffer, ERR_DATA_BUFFER_SIZE);
+
+	// Fire pal_mc_error_inject procedure.
+	sprintf(fn, "%s/call_start", path);
+	wr(fn, mode);
+
+	if (rd_capabilities(path, capabilities)<0)
+		return -1;
+
+	return 0;
+}
+
+int query_all_capabilities()
+{
+	int status;
+	err_type_info_t err_type_info;
+	int err_sev, err_struct, struct_hier;
+	int cap=0;
+	u64 capabilities;
+	char path[MAX_FN_SIZE];
+
+	err_type_info.err_type_info=0;			// Initial
+	err_type_info.err_type_info_u.mode=0;		// Query mode;
+	err_type_info.err_type_info_u.err_inj=0;
+
+	printf("All capabilities implemented in pal_mc_error_inject:\n");
+	sprintf(path, PATH_FORMAT ,0);
+	for (err_sev=0;err_sev<3;err_sev++)
+		for (err_struct=0;err_struct<5;err_struct++)
+			for (struct_hier=0;struct_hier<5;struct_hier++)
+	{
+		status=-1;
+		capabilities=0;
+		err_type_info.err_type_info_u.err_sev=err_sev;
+		err_type_info.err_type_info_u.err_struct=err_struct;
+		err_type_info.err_type_info_u.struct_hier=struct_hier;
+
+		if (query_capabilities(path, err_type_info, &capabilities)<0)
+			continue;
+
+		if (rd_status(path, &status)<0)
+			continue;
+
+		if (status==0) {
+			cap=1;
+			printf("For err_sev=%d, err_struct=%d, struct_hier=%d: ",
+				err_sev, err_struct, struct_hier);
+			printf("capabilities 0x%lx\n", capabilities);
+		}
+	}
+	if (!cap) {
+		printf("No capabilities supported.\n");
+		return 0;
+	}
+
+	return 0;
+}
+
+int err_inject(int cpu, char *path, err_type_info_t err_type_info,
+		err_struct_info_t err_struct_info,
+		err_data_buffer_t err_data_buffer)
+{
+	int status;
+	char fn[MAX_FN_SIZE];
+
+	log_info(cpu, "err_type_info=%lx, err_struct_info=%lx, ",
+		err_type_info.err_type_info,
+		err_struct_info.err_struct_info);
+	log_info(cpu,"err_data_buffer=[%lx,%lx,%lx]\n",
+		err_data_buffer.err_data_buffer[0],
+		err_data_buffer.err_data_buffer[1],
+		err_data_buffer.err_data_buffer[2]);
+	sprintf(fn, "%s/err_type_info", path);
+	wr(fn, err_type_info.err_type_info);
+	sprintf(fn, "%s/err_struct_info", path);
+	wr(fn, err_struct_info.err_struct_info);
+	sprintf(fn, "%s/err_data_buffer", path);
+	wr_multi(fn, err_data_buffer.err_data_buffer, ERR_DATA_BUFFER_SIZE);
+
+	// Fire pal_mc_error_inject procedure.
+	sprintf(fn, "%s/call_start", path);
+	wr(fn,mode);
+
+	if (rd_status(path, &status)<0) {
+		vbprintf("fail: read status\n");
+		return -100;
+	}
+
+	if (status!=0) {
+		log_info(cpu, "fail: status=%d\n", status);
+		return status;
+	}
+
+	return status;
+}
+
+static int construct_data_buf(char *path, err_type_info_t err_type_info,
+		err_struct_info_t err_struct_info,
+		err_data_buffer_t *err_data_buffer,
+		void *va1)
+{
+	char fn[MAX_FN_SIZE];
+	u64 virt_addr=0, phys_addr=0;
+
+	vbprintf("va1=%lx\n", (u64)va1);
+	memset(&err_data_buffer->err_data_buffer_cache, 0, ERR_DATA_BUFFER_SIZE*8);
+
+	switch (err_type_info.err_type_info_u.err_struct) {
+		case 1: // Cache
+			switch (err_struct_info.err_struct_info_cache.cl_id) {
+				case 1: //Virtual addr
+					err_data_buffer->err_data_buffer_cache.inj_addr=(u64)va1;
+					break;
+				case 2: //Phys addr
+					sprintf(fn, "%s/virtual_to_phys", path);
+					virt_addr=(u64)va1;
+					if (wr(fn,virt_addr)<0)
+						return -1;
+					rd(fn, &phys_addr);
+					err_data_buffer->err_data_buffer_cache.inj_addr=phys_addr;
+					break;
+				default:
+					printf("Not supported cl_id\n");
+					break;
+			}
+			break;
+		case 2: //  TLB
+			break;
+		case 3: //  Register file
+			break;
+		case 4: //  Bus/system interconnect
+		default:
+			printf("Not supported err_struct\n");
+			break;
+	}
+
+	return 0;
+}
+
+typedef struct {
+	u64 cpu;
+	u64 loop;
+	u64 interval;
+	u64 err_type_info;
+	u64 err_struct_info;
+	u64 err_data_buffer[ERR_DATA_BUFFER_SIZE];
+} parameters_t;
+
+parameters_t line_para;
+int para;
+
+static int empty_data_buffer(u64 *err_data_buffer)
+{
+	int empty=1;
+	int i;
+
+	for (i=0;i<ERR_DATA_BUFFER_SIZE; i++)
+	   if (err_data_buffer[i]!=-1)
+		empty=0;
+
+	return empty;
+}
+
+int err_inj()
+{
+	err_type_info_t err_type_info;
+	err_struct_info_t err_struct_info;
+	err_data_buffer_t err_data_buffer;
+	int count;
+	FILE *fp;
+	unsigned long cpu, loop, interval, err_type_info_conf, err_struct_info_conf;
+	u64 err_data_buffer_conf[ERR_DATA_BUFFER_SIZE];
+	int num;
+	int i;
+	char path[MAX_FN_SIZE];
+	parameters_t parameters[MAX_TASK_NUM]={};
+	pid_t child_pid[MAX_TASK_NUM];
+	time_t current_time;
+	int status;
+
+	if (!para) {
+	    fp=fopen("err.conf", "r");
+	    if (fp==NULL) {
+		perror("Error open err.conf");
+		return -1;
+	    }
+
+	    num=0;
+	    while (!feof(fp)) {
+		char buf[256];
+		memset(buf,0,256);
+		fgets(buf, 256, fp);
+		count=sscanf(buf, "%lx, %lx, %lx, %lx, %lx, %lx, %lx, %lx\n",
+				&cpu, &loop, &interval,&err_type_info_conf,
+				&err_struct_info_conf,
+				&err_data_buffer_conf[0],
+				&err_data_buffer_conf[1],
+				&err_data_buffer_conf[2]);
+		if (count!=PARA_FIELD_NUM+3) {
+			err_data_buffer_conf[0]=-1;
+			err_data_buffer_conf[1]=-1;
+			err_data_buffer_conf[2]=-1;
+			count=sscanf(buf, "%lx, %lx, %lx, %lx, %lx\n",
+				&cpu, &loop, &interval,&err_type_info_conf,
+				&err_struct_info_conf);
+			if (count!=PARA_FIELD_NUM)
+				continue;
+		}
+
+		parameters[num].cpu=cpu;
+		parameters[num].loop=loop;
+		parameters[num].interval= interval>MIN_INTERVAL
+					  ?interval:MIN_INTERVAL;
+		parameters[num].err_type_info=err_type_info_conf;
+		parameters[num].err_struct_info=err_struct_info_conf;
+		memcpy(parameters[num++].err_data_buffer,
+			err_data_buffer_conf,ERR_DATA_BUFFER_SIZE*8) ;
+
+		if (num>=MAX_TASK_NUM)
+			break;
+	    }
+	}
+	else {
+		parameters[0].cpu=line_para.cpu;
+		parameters[0].loop=line_para.loop;
+		parameters[0].interval= line_para.interval>MIN_INTERVAL
+					  ?line_para.interval:MIN_INTERVAL;
+		parameters[0].err_type_info=line_para.err_type_info;
+		parameters[0].err_struct_info=line_para.err_struct_info;
+		memcpy(parameters[0].err_data_buffer,
+			line_para.err_data_buffer,ERR_DATA_BUFFER_SIZE*8) ;
+
+		num=1;
+	}
+
+	/* Create semaphore: If one_lock, one semaphore for all processors.
+	   Otherwise, one sempaphore for each processor. */
+	if (one_lock) {
+		if (create_sem(0)) {
+			printf("Can not create semaphore...exit\n");
+			free_sem(0);
+			return -1;
+		}
+	}
+	else {
+		for (i=0;i<num;i++) {
+		   if (create_sem(parameters[i].cpu)) {
+			printf("Can not create semaphore for cpu%d...exit\n",i);
+			free_sem(parameters[num].cpu);
+			return -1;
+		   }
+		}
+	}
+
+	/* Create a shm segment which will be used to inject/consume errors on.*/
+	if (create_shm()==-1) {
+		printf("Error to create shm...exit\n");
+		return -1;
+	}
+
+	for (i=0;i<num;i++) {
+		pid_t pid;
+
+		current_time=time(NULL);
+		log_info(parameters[i].cpu, "\nBegine at %s", ctime(&current_time));
+		log_info(parameters[i].cpu, "Configurations:\n");
+		log_info(parameters[i].cpu,"On cpu%ld: loop=%lx, interval=%lx(s)",
+			parameters[i].cpu,
+			parameters[i].loop,
+			parameters[i].interval);
+		log_info(parameters[i].cpu," err_type_info=%lx,err_struct_info=%lx\n",
+			parameters[i].err_type_info,
+			parameters[i].err_struct_info);
+
+		sprintf(path, PATH_FORMAT, (int)parameters[i].cpu);
+		err_type_info.err_type_info=parameters[i].err_type_info;
+		err_struct_info.err_struct_info=parameters[i].err_struct_info;
+		memcpy(err_data_buffer.err_data_buffer,
+			parameters[i].err_data_buffer,
+			ERR_DATA_BUFFER_SIZE*8);
+
+		pid=fork();
+		if (pid==0) {
+			unsigned long mask[MASK_SIZE];
+			int j, k;
+
+			void *va1, *va2;
+
+			/* Allocate two memory areas va1 and va2 in shm */
+			va1=shmaddr+parameters[i].cpu*PAGE_SIZE;
+			va2=shmaddr+parameters[i].cpu*PAGE_SIZE+PAGE_SIZE;
+
+			vbprintf("va1=%lx, va2=%lx\n", (u64)va1, (u64)va2);
+			memset(va1, 0x1, PAGE_SIZE);
+			memset(va2, 0x2, PAGE_SIZE);
+
+			if (empty_data_buffer(err_data_buffer.err_data_buffer))
+				/* If not specified yet, construct data buffer
+				 * with va1
+				 */
+				construct_data_buf(path, err_type_info,
+					err_struct_info, &err_data_buffer,va1);
+
+			for (j=0;j<MASK_SIZE;j++)
+				mask[j]=0;
+
+			cpu=parameters[i].cpu;
+			k = cpu%64;
+			j = cpu/64;
+			mask[j]=1<<k;
+
+			if (sched_setaffinity(0, MASK_SIZE*8, mask)==-1) {
+				perror("Error sched_setaffinity:");
+				return -1;
+			}
+
+			for (j=0; j<parameters[i].loop; j++) {
+				log_info(parameters[i].cpu,"Injection ");
+				log_info(parameters[i].cpu,"on cpu%ld: #%d/%ld ",
+
+					parameters[i].cpu,j+1, parameters[i].loop);
+
+				/* Hold the lock */
+				if (one_lock)
+					lock(0);
+				else
+				/* Hold lock on this cpu */
+					lock(parameters[i].cpu);
+
+				if ((status=err_inject(parameters[i].cpu,
+					   path, err_type_info,
+					   err_struct_info, err_data_buffer))
+					   ==0) {
+					/* consume the error for "inject only"*/
+					memcpy(va2, va1, PAGE_SIZE);
+					memcpy(va1, va2, PAGE_SIZE);
+					log_info(parameters[i].cpu,
+						"successful\n");
+				}
+				else {
+					log_info(parameters[i].cpu,"fail:");
+					log_info(parameters[i].cpu,
+						"status=%d\n", status);
+					unlock(parameters[i].cpu);
+					break;
+				}
+				if (one_lock)
+				/* Release the lock */
+					unlock(0);
+				/* Release lock on this cpu */
+				else
+					unlock(parameters[i].cpu);
+
+				if (j < parameters[i].loop-1)
+					sleep(parameters[i].interval);
+			}
+			current_time=time(NULL);
+			log_info(parameters[i].cpu, "Done at %s", ctime(&current_time));
+			return 0;
+		}
+		else if (pid<0) {
+			perror("Error fork:");
+			continue;
+		}
+		child_pid[i]=pid;
+	}
+	for (i=0;i<num;i++)
+		waitpid(child_pid[i], NULL, 0);
+
+	if (one_lock)
+		free_sem(0);
+	else
+		for (i=0;i<num;i++)
+			free_sem(parameters[i].cpu);
+
+	printf("All done.\n");
+
+	return 0;
+}
+
+void help()
+{
+	printf("err_inject_tool:\n");
+	printf("\t-q: query all capabilities. default: off\n");
+	printf("\t-m: procedure mode. 1: physical 2: virtual. default: 1\n");
+	printf("\t-i: inject errors. default: off\n");
+	printf("\t-l: one lock per cpu. default: one lock for all\n");
+	printf("\t-e: error parameters:\n");
+	printf("\t\tcpu,loop,interval,err_type_info,err_struct_info[,err_data_buffer[0],err_data_buffer[1],err_data_buffer[2]]\n");
+	printf("\t\t   cpu: logical cpu number the error will be inject in.\n");
+	printf("\t\t   loop: times the error will be injected.\n");
+	printf("\t\t   interval: In second. every so often one error is injected.\n");
+	printf("\t\t   err_type_info, err_struct_info: PAL parameters.\n");
+	printf("\t\t   err_data_buffer: PAL parameter. Optional. If not present,\n");
+	printf("\t\t                    it's constructed by tool automatically. Be\n");
+	printf("\t\t                    careful to provide err_data_buffer and make\n");
+	printf("\t\t                    sure it's working with the environment.\n");
+	printf("\t    Note:no space between error parameters.\n");
+	printf("\t    default: Take error parameters from err.conf instead of command line.\n");
+	printf("\t-v: verbose. default: off\n");
+	printf("\t-h: help\n\n");
+	printf("The tool will take err.conf file as ");
+	printf("input to inject single or multiple errors ");
+	printf("on one or multiple cpus in parallel.\n");
+}
+
+int main(int argc, char **argv)
+{
+	char c;
+	int do_err_inj=0;
+	int do_query_all=0;
+	int count;
+	u32 m;
+
+	/* Default one lock for all cpu's */
+	one_lock=1;
+	while ((c = getopt(argc, argv, "m:iqvhle:")) != EOF)
+		switch (c) {
+			case 'm':	/* Procedure mode. 1: phys 2: virt */
+				count=sscanf(optarg, "%x", &m);
+				if (count!=1 || (m!=1 && m!=2)) {
+					printf("Wrong mode number.\n");
+					help();
+					return -1;
+				}
+				mode=m;
+				break;
+			case 'i':	/* Inject errors */
+				do_err_inj=1;
+				break;
+			case 'q':	/* Query */
+				do_query_all=1;
+				break;
+			case 'v':	/* Verbose */
+				verbose=1;
+				break;
+			case 'l':	/* One lock per cpu */
+				one_lock=0;
+				break;
+			case 'e':	/* error arguments */
+				/* Take parameters:
+				 * #cpu, loop, interval, err_type_info, err_struct_info[, err_data_buffer]
+				 * err_data_buffer is optional. Recommend not to specify
+				 * err_data_buffer. Better to use tool to generate it.
+				 */
+				count=sscanf(optarg,
+					"%lx, %lx, %lx, %lx, %lx, %lx, %lx, %lx\n",
+					&line_para.cpu,
+					&line_para.loop,
+					&line_para.interval,
+					&line_para.err_type_info,
+					&line_para.err_struct_info,
+					&line_para.err_data_buffer[0],
+					&line_para.err_data_buffer[1],
+					&line_para.err_data_buffer[2]);
+				if (count!=PARA_FIELD_NUM+3) {
+				    line_para.err_data_buffer[0]=-1,
+				    line_para.err_data_buffer[1]=-1,
+			 	    line_para.err_data_buffer[2]=-1;
+				    count=sscanf(optarg, "%lx, %lx, %lx, %lx, %lx\n",
+						&line_para.cpu,
+						&line_para.loop,
+						&line_para.interval,
+						&line_para.err_type_info,
+						&line_para.err_struct_info);
+				    if (count!=PARA_FIELD_NUM) {
+					printf("Wrong error arguments.\n");
+					help();
+					return -1;
+				    }
+				}
+				para=1;
+				break;
+			continue;
+				break;
+			case 'h':
+				help();
+				return 0;
+			default:
+				break;
+		}
+
+	if (do_query_all)
+		query_all_capabilities();
+	if (do_err_inj)
+		err_inj();
+
+	if (!do_query_all &&  !do_err_inj)
+		help();
+
+	return 0;
+}
+
-- 
cgit v1.2.3


From e1b43bd556a611584a65f529e5077c1b54ace4f7 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Mon, 5 Feb 2007 15:47:43 -0800
Subject: [IA64] Fix example error injection program

Progam accessed using /sys/devices/system/node/node0/cpu%d/err_inject/
This path only exists for CONFIG_NUMA=y systems.  Better to use
/sys/devices/system/cpu/cpu%d/err_inject/ which is available on all
systems.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 Documentation/ia64/err_inject.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/ia64/err_inject.txt b/Documentation/ia64/err_inject.txt
index 26487c172cf..6449a7090db 100644
--- a/Documentation/ia64/err_inject.txt
+++ b/Documentation/ia64/err_inject.txt
@@ -111,7 +111,7 @@ err_injection_tool.c:
 #define	ERR_DATA_BUFFER_SIZE 	3	// Three 8-byte.
 #define PARA_FIELD_NUM		5
 #define MASK_SIZE		(NR_CPUS/64)
-#define PATH_FORMAT "/sys/devices/system/node/node0/cpu%d/err_inject/"
+#define PATH_FORMAT "/sys/devices/system/cpu/cpu%d/err_inject/"
 
 int sched_setaffinity(pid_t pid, unsigned int len, unsigned long *mask);
 
-- 
cgit v1.2.3


From ddd83eff58888928115b3e225a46d3c686e64594 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Fri, 30 Mar 2007 10:39:42 -0600
Subject: [IA64] update memory attribute aliasing documentation & test cases

Updates documentation and adds some test cases.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 Documentation/ia64/aliasing-test.c | 247 +++++++++++++++++++++++++++++++++++++
 Documentation/ia64/aliasing.txt    |  71 ++++++-----
 2 files changed, 284 insertions(+), 34 deletions(-)
 create mode 100644 Documentation/ia64/aliasing-test.c

(limited to 'Documentation')

diff --git a/Documentation/ia64/aliasing-test.c b/Documentation/ia64/aliasing-test.c
new file mode 100644
index 00000000000..3153167b41c
--- /dev/null
+++ b/Documentation/ia64/aliasing-test.c
@@ -0,0 +1,247 @@
+/*
+ * Exercise /dev/mem mmap cases that have been troublesome in the past
+ *
+ * (c) Copyright 2007 Hewlett-Packard Development Company, L.P.
+ *	Bjorn Helgaas <bjorn.helgaas@hp.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <fnmatch.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+int sum;
+
+int map_mem(char *path, off_t offset, size_t length, int touch)
+{
+	int fd, rc;
+	void *addr;
+	int *c;
+
+	fd = open(path, O_RDWR);
+	if (fd == -1) {
+		perror(path);
+		return -1;
+	}
+
+	addr = mmap(NULL, length, PROT_READ|PROT_WRITE, MAP_SHARED, fd, offset);
+	if (addr == MAP_FAILED)
+		return 1;
+
+	if (touch) {
+		c = (int *) addr;
+		while (c < (int *) (offset + length))
+			sum += *c++;
+	}
+
+	rc = munmap(addr, length);
+	if (rc == -1) {
+		perror("munmap");
+		return -1;
+	}
+
+	close(fd);
+	return 0;
+}
+
+int scan_sysfs(char *path, char *file, off_t offset, size_t length, int touch)
+{
+	struct dirent **namelist;
+	char *name, *path2;
+	int i, n, r, rc, result = 0;
+	struct stat buf;
+
+	n = scandir(path, &namelist, 0, alphasort);
+	if (n < 0) {
+		perror("scandir");
+		return -1;
+	}
+
+	for (i = 0; i < n; i++) {
+		name = namelist[i]->d_name;
+
+		if (fnmatch(".", name, 0) == 0)
+			goto skip;
+		if (fnmatch("..", name, 0) == 0)
+			goto skip;
+
+		path2 = malloc(strlen(path) + strlen(name) + 3);
+		strcpy(path2, path);
+		strcat(path2, "/");
+		strcat(path2, name);
+
+		if (fnmatch(file, name, 0) == 0) {
+			rc = map_mem(path2, offset, length, touch);
+			if (rc == 0)
+				fprintf(stderr, "PASS: %s 0x%lx-0x%lx is %s\n", path2, offset, offset + length, touch ? "readable" : "mappable");
+			else if (rc > 0)
+				fprintf(stderr, "PASS: %s 0x%lx-0x%lx not mappable\n", path2, offset, offset + length);
+			else {
+				fprintf(stderr, "FAIL: %s 0x%lx-0x%lx not accessible\n", path2, offset, offset + length);
+				return rc;
+			}
+		} else {
+			r = lstat(path2, &buf);
+			if (r == 0 && S_ISDIR(buf.st_mode)) {
+				rc = scan_sysfs(path2, file, offset, length, touch);
+				if (rc < 0)
+					return rc;
+			}
+		}
+
+		result |= rc;
+		free(path2);
+
+skip:
+		free(namelist[i]);
+	}
+	free(namelist);
+	return rc;
+}
+
+char buf[1024];
+
+int read_rom(char *path)
+{
+	int fd, rc;
+	size_t size = 0;
+
+	fd = open(path, O_RDWR);
+	if (fd == -1) {
+		perror(path);
+		return -1;
+	}
+
+	rc = write(fd, "1", 2);
+	if (rc <= 0) {
+		perror("write");
+		return -1;
+	}
+
+	do {
+		rc = read(fd, buf, sizeof(buf));
+		if (rc > 0)
+			size += rc;
+	} while (rc > 0);
+
+	close(fd);
+	return size;
+}
+
+int scan_rom(char *path, char *file)
+{
+	struct dirent **namelist;
+	char *name, *path2;
+	int i, n, r, rc, result = 0;
+	struct stat buf;
+
+	n = scandir(path, &namelist, 0, alphasort);
+	if (n < 0) {
+		perror("scandir");
+		return -1;
+	}
+
+	for (i = 0; i < n; i++) {
+		name = namelist[i]->d_name;
+
+		if (fnmatch(".", name, 0) == 0)
+			goto skip;
+		if (fnmatch("..", name, 0) == 0)
+			goto skip;
+
+		path2 = malloc(strlen(path) + strlen(name) + 3);
+		strcpy(path2, path);
+		strcat(path2, "/");
+		strcat(path2, name);
+
+		if (fnmatch(file, name, 0) == 0) {
+			rc = read_rom(path2);
+
+			/*
+			 * It's OK if the ROM is unreadable.  Maybe there
+			 * is no ROM, or some other error ocurred.  The
+			 * important thing is that no MCA happened.
+			 */
+			if (rc > 0)
+				fprintf(stderr, "PASS: %s read %ld bytes\n", path2, rc);
+			else {
+				fprintf(stderr, "PASS: %s not readable\n", path2);
+				return rc;
+			}
+		} else {
+			r = lstat(path2, &buf);
+			if (r == 0 && S_ISDIR(buf.st_mode)) {
+				rc = scan_rom(path2, file);
+				if (rc < 0)
+					return rc;
+			}
+		}
+
+		result |= rc;
+		free(path2);
+
+skip:
+		free(namelist[i]);
+	}
+	free(namelist);
+	return rc;
+}
+
+main()
+{
+	int rc;
+
+	if (map_mem("/dev/mem", 0, 0xA0000, 1) == 0)
+		fprintf(stderr, "PASS: /dev/mem 0x0-0xa0000 is readable\n");
+	else
+		fprintf(stderr, "FAIL: /dev/mem 0x0-0xa0000 not accessible\n");
+
+	/*
+	 * It's not safe to blindly read the VGA frame buffer.  If you know
+	 * how to poke the card the right way, it should respond, but it's
+	 * not safe in general.  Many machines, e.g., Intel chipsets, cover
+	 * up a non-responding card by just returning -1, but others will
+	 * report the failure as a machine check.
+	 */
+	if (map_mem("/dev/mem", 0xA0000, 0x20000, 0) == 0)
+		fprintf(stderr, "PASS: /dev/mem 0xa0000-0xc0000 is mappable\n");
+	else
+		fprintf(stderr, "FAIL: /dev/mem 0xa0000-0xc0000 not accessible\n");
+
+	if (map_mem("/dev/mem", 0xC0000, 0x40000, 1) == 0)
+		fprintf(stderr, "PASS: /dev/mem 0xc0000-0x100000 is readable\n");
+	else
+		fprintf(stderr, "FAIL: /dev/mem 0xc0000-0x100000 not accessible\n");
+
+	/*
+	 * Often you can map all the individual pieces above (0-0xA0000,
+	 * 0xA0000-0xC0000, and 0xC0000-0x100000), but can't map the whole
+	 * thing at once.  This is because the individual pieces use different
+	 * attributes, and there's no single attribute supported over the
+	 * whole region.
+	 */
+	rc = map_mem("/dev/mem", 0, 1024*1024, 0);
+	if (rc == 0)
+		fprintf(stderr, "PASS: /dev/mem 0x0-0x100000 is mappable\n");
+	else if (rc > 0)
+		fprintf(stderr, "PASS: /dev/mem 0x0-0x100000 not mappable\n");
+	else
+		fprintf(stderr, "FAIL: /dev/mem 0x0-0x100000 not accessible\n");
+
+	scan_sysfs("/sys/class/pci_bus", "legacy_mem", 0, 0xA0000, 1);
+	scan_sysfs("/sys/class/pci_bus", "legacy_mem", 0xA0000, 0x20000, 0);
+	scan_sysfs("/sys/class/pci_bus", "legacy_mem", 0xC0000, 0x40000, 1);
+	scan_sysfs("/sys/class/pci_bus", "legacy_mem", 0, 1024*1024, 0);
+
+	scan_rom("/sys/devices", "rom");
+}
diff --git a/Documentation/ia64/aliasing.txt b/Documentation/ia64/aliasing.txt
index 38f9a52d182..9a431a7d0f5 100644
--- a/Documentation/ia64/aliasing.txt
+++ b/Documentation/ia64/aliasing.txt
@@ -112,16 +112,6 @@ POTENTIAL ATTRIBUTE ALIASING CASES
 
 	The /dev/mem mmap constraints apply.
 
-	However, since this is for mapping legacy MMIO space, WB access
-	does not make sense.  This matters on machines without legacy
-	VGA support: these machines may have WB memory for the entire
-	first megabyte (or even the entire first granule).
-
-	On these machines, we could mmap legacy_mem as WB, which would
-	be safe in terms of attribute aliasing, but X has no way of
-	knowing that it is accessing regular memory, not a frame buffer,
-	so the kernel should fail the mmap rather than doing it with WB.
-
     read/write of /dev/mem
 
 	This uses copy_from_user(), which implicitly uses a kernel
@@ -138,14 +128,20 @@ POTENTIAL ATTRIBUTE ALIASING CASES
 
     ioremap()
 
-	This returns a kernel identity mapping for use inside the
-	kernel.
+	This returns a mapping for use inside the kernel.
 
 	If the region is in kern_memmap, we should use the attribute
-	specified there.  Otherwise, if the EFI memory map reports that
-	the entire granule supports WB, we should use that (granules
-	that are partially reserved or occupied by firmware do not appear
-	in kern_memmap).  Otherwise, we should use a UC mapping.
+	specified there.
+
+	If the EFI memory map reports that the entire granule supports
+	WB, we should use that (granules that are partially reserved
+	or occupied by firmware do not appear in kern_memmap).
+
+	If the granule contains non-WB memory, but we can cover the
+	region safely with kernel page table mappings, we can use
+	ioremap_page_range() as most other architectures do.
+
+	Failing all of the above, we have to fall back to a UC mapping.
 
 PAST PROBLEM CASES
 
@@ -158,7 +154,7 @@ PAST PROBLEM CASES
       succeed.  It may create either WB or UC user mappings, depending
       on whether the region is in kern_memmap or the EFI memory map.
 
-    mmap of 0x0-0xA0000 /dev/mem by "hwinfo" on HP sx1000 with VGA enabled
+    mmap of 0x0-0x9FFFF /dev/mem by "hwinfo" on HP sx1000 with VGA enabled
 
       See https://bugzilla.novell.com/show_bug.cgi?id=140858.
 
@@ -171,28 +167,25 @@ PAST PROBLEM CASES
       so it is safe to use WB mappings.
 
       The kernel VGA driver may ioremap the VGA frame buffer at 0xA0000,
-      which will use a granule-sized UC mapping covering 0-0xFFFFF.  This
-      granule covers some WB-only memory, but since UC is non-speculative,
-      the processor will never generate an uncacheable reference to the
-      WB-only areas unless the driver explicitly touches them.
+      which uses a granule-sized UC mapping.  This granule will cover some
+      WB-only memory, but since UC is non-speculative, the processor will
+      never generate an uncacheable reference to the WB-only areas unless
+      the driver explicitly touches them.
 
     mmap of 0x0-0xFFFFF legacy_mem by "X"
 
-      If the EFI memory map reports this entire range as WB, there
-      is no VGA MMIO hole, and the mmap should fail or be done with
-      a WB mapping.
+      If the EFI memory map reports that the entire range supports the
+      same attributes, we can allow the mmap (and we will prefer WB if
+      supported, as is the case with HP sx[12]000 machines with VGA
+      disabled).
 
-      There's no easy way for X to determine whether the 0xA0000-0xBFFFF
-      region is a frame buffer or just memory, so I think it's best to
-      just fail this mmap request rather than using a WB mapping.  As
-      far as I know, there's no need to map legacy_mem with WB
-      mappings.
+      If EFI reports the range as partly WB and partly UC (as on sx[12]000
+      machines with VGA enabled), we must fail the mmap because there's no
+      safe attribute to use.
 
-      Otherwise, a UC mapping of the entire region is probably safe.
-      The VGA hole means the region will not be in kern_memmap.  The
-      HP sx1000 chipset doesn't support UC access to the memory surrounding
-      the VGA hole, but X doesn't need that area anyway and should not
-      reference it.
+      If EFI reports some of the range but not all (as on Intel firmware
+      that doesn't report the VGA frame buffer at all), we should fail the
+      mmap and force the user to map just the specific region of interest.
 
     mmap of 0xA0000-0xBFFFF legacy_mem by "X" on HP sx1000 with VGA disabled
 
@@ -202,6 +195,16 @@ PAST PROBLEM CASES
       This is a special case of the previous case, and the mmap should
       fail for the same reason as above.
 
+    read of /sys/devices/.../rom
+
+      For VGA devices, this may cause an ioremap() of 0xC0000.  This
+      used to be done with a UC mapping, because the VGA frame buffer
+      at 0xA0000 prevents use of a WB granule.  The UC mapping causes
+      an MCA on HP sx[12]000 chipsets.
+
+      We should use WB page table mappings to avoid covering the VGA
+      frame buffer.
+
 NOTES
 
     [1] SDM rev 2.2, vol 2, sec 4.4.1.
-- 
cgit v1.2.3


From b73c3d778647bffff6dbcbe41e8fc01215a22194 Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@mindspring.com>
Date: Mon, 26 Mar 2007 21:59:33 -0800
Subject: [SCSI] Remove some unused SCSI-related kernel config variables.

Remove the unused SCSI-related kernel config variables

	SCSI_NCR53C8XX_PROFILE_SUPPORT
	SCSI_NCR53C8XX_PROFILE
	53C700_IO_MAPPED
	AIC79XX_ENABLE_RD_STRM
	AIC7XXX_PROBE_EISA_VL

Signed-off-by: Robert P. J. Day <rpjday@mindspring.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 Documentation/scsi/ncr53c8xx.txt | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/scsi/ncr53c8xx.txt b/Documentation/scsi/ncr53c8xx.txt
index caf10b15518..88ef88b949f 100644
--- a/Documentation/scsi/ncr53c8xx.txt
+++ b/Documentation/scsi/ncr53c8xx.txt
@@ -562,11 +562,6 @@ if only one has a flaw for some SCSI feature, you can disable the
 support by the driver of this feature at linux start-up and enable
 this feature after boot-up only for devices that support it safely.
 
-CONFIG_SCSI_NCR53C8XX_PROFILE_SUPPORT  (default answer: n)
-    This option must be set for profiling information to be gathered 
-    and printed out through the proc file system. This features may 
-    impact performances.
-
 CONFIG_SCSI_NCR53C8XX_IOMAPPED       (default answer: n)
     Answer "y" if you suspect your mother board to not allow memory mapped I/O.
     May slow down performance a little.  This option is required by
-- 
cgit v1.2.3


From 10c9a017f1bd84a7aedaea7029cd5224863197db Mon Sep 17 00:00:00 2001
From: "Salyzyn, Mark" <mark_salyzyn@adaptec.com>
Date: Tue, 27 Mar 2007 11:51:34 -0400
Subject: [SCSI] aacraid: Add SMC and SUN products to README

Add SMC and SUN products to aacraid documentation

Signed-off-by: Mark Salyzyn <aacraid@adaptec.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 Documentation/scsi/aacraid.txt | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/scsi/aacraid.txt b/Documentation/scsi/aacraid.txt
index dc8e44fc650..bddc641dddb 100644
--- a/Documentation/scsi/aacraid.txt
+++ b/Documentation/scsi/aacraid.txt
@@ -37,7 +37,11 @@ Supported Cards/Chipsets
 	9005:0286:9005:029d	Adaptec	2420SA (Intruder HP release)
 	9005:0286:9005:02ac	Adaptec	1800 (Typhoon44)
 	9005:0285:9005:02b5	Adaptec	5445 (Voodoo44)
+	9005:0285:15d9:02b5	SMC	AOC-USAS-S4i
+	9005:0285:15d9:02c9	SMC	AOC-USAS-S4iB
 	9005:0285:9005:02b6	Adaptec	5805 (Voodoo80)
+	9005:0285:15d9:02b6	SMC	AOC-USAS-S8i
+	9005:0285:15d9:02ca	SMC	AOC-USAS-S8iB
 	9005:0285:9005:02b7	Adaptec	5085 (Voodoo08)
 	9005:0285:9005:02bb	Adaptec	3405 (Marauder40LP)
 	9005:0285:9005:02bc	Adaptec	3805 (Marauder80LP)
@@ -93,6 +97,9 @@ Supported Cards/Chipsets
 	9005:0286:9005:02ae		(Aurora Lite ARK)
 	9005:0285:9005:02b0		(Sunrise Lake ARK)
 	9005:0285:9005:02b1	Adaptec	(Voodoo 8 internal 8 external)
+	9005:0285:108e:7aac	SUN	STK RAID REM (Voodoo44 Coyote)
+	9005:0285:108e:0286	SUN	SG-XPCIESAS-R-IN (Cougar)
+	9005:0285:108e:0287	SUN	SG-XPCIESAS-R-EX (Prometheus)
 
 People
 -------------------------
-- 
cgit v1.2.3


From 144ff8bf0d84a81806bfb5979b0a6b176b1fcace Mon Sep 17 00:00:00 2001
From: "Salyzyn, Mark" <mark_salyzyn@adaptec.com>
Date: Wed, 4 Apr 2007 15:49:54 -0400
Subject: [SCSI] aacraid: Correct SMC products in aacraid.txt

Correct a spelling mistake for the SMC product names (replace 'B' with
'R') in the Documentation/scsi/aacraid.txt file. This is a follow-up to
a documentation patch '[PATCH] aacraid: Add SMC and SUN products to
README' submitted and accepted to scsi-misc-2.6 on March 27 2007.

Signed-off-by: Mark Salyzyn <aacraid@adaptec.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 Documentation/scsi/aacraid.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/scsi/aacraid.txt b/Documentation/scsi/aacraid.txt
index bddc641dddb..2368e7e4a8c 100644
--- a/Documentation/scsi/aacraid.txt
+++ b/Documentation/scsi/aacraid.txt
@@ -38,10 +38,10 @@ Supported Cards/Chipsets
 	9005:0286:9005:02ac	Adaptec	1800 (Typhoon44)
 	9005:0285:9005:02b5	Adaptec	5445 (Voodoo44)
 	9005:0285:15d9:02b5	SMC	AOC-USAS-S4i
-	9005:0285:15d9:02c9	SMC	AOC-USAS-S4iB
+	9005:0285:15d9:02c9	SMC	AOC-USAS-S4iR
 	9005:0285:9005:02b6	Adaptec	5805 (Voodoo80)
 	9005:0285:15d9:02b6	SMC	AOC-USAS-S8i
-	9005:0285:15d9:02ca	SMC	AOC-USAS-S8iB
+	9005:0285:15d9:02ca	SMC	AOC-USAS-S8iR
 	9005:0285:9005:02b7	Adaptec	5085 (Voodoo08)
 	9005:0285:9005:02bb	Adaptec	3405 (Marauder40LP)
 	9005:0285:9005:02bc	Adaptec	3805 (Marauder80LP)
-- 
cgit v1.2.3


From 7053acbd78336abf5d4bc3d8a875a03624cfb83f Mon Sep 17 00:00:00 2001
From: Eric Miao <eric.y.miao@gmail.com>
Date: Thu, 5 Apr 2007 04:07:20 +0100
Subject: [ARM] 4304/1: removes the unnecessary bit number from CKENnn_XXXX

This patch removes the unnecessary bit number from CKENnn_XXXX
definitions for PXA, so that

	CKEN0_PWM0 --> CKEN_PWM0
	CKEN1_PWM1 --> CKEN_PWM1
	...
	CKEN24_CAMERA --> CKEN_CAMERA

The reasons for the change of these defitions are:

1. they do not scale - they are currently valid for pxa2xx, but
definitely not valid for pxa3xx, e.g., pxa3xx has bit 3 for camera
instead of bit 24

2. they are unnecessary - the peripheral name within the definition
has already announced its usage, we don't need those bit numbers
to know which peripheral we are going to enable/disable clock for

3. they are inconvenient - think about this: a driver programmer
for pxa has to remember which bit in the CKEN register to turn
on/off

Another change in the patch is to make the definitions equal to its
clock bit index, so that

   #define CKEN_CAMERA  (24)

instead of

   #define CKEN_CAMERA  (1 << 24)

this change, however, will add a run-time bit shift operation in
pxa_set_cken(), but the benefit of this change is that it scales
when bit index exceeds 32, e.g., pxa3xx has two registers CKENA
and CKENB, totally 64 bit for this, suppose CAMERA clock enabling
bit is CKENB:10, one can simply define CKEN_CAMERA to be (32 + 10)
and so that pxa_set_cken() need minimum change to adapt to that.

Signed-off-by: eric miao <eric.y.miao@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 Documentation/spi/pxa2xx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/spi/pxa2xx b/Documentation/spi/pxa2xx
index f9717fe9bd8..215e3b8e726 100644
--- a/Documentation/spi/pxa2xx
+++ b/Documentation/spi/pxa2xx
@@ -62,7 +62,7 @@ static struct resource pxa_spi_nssp_resources[] = {
 
 static struct pxa2xx_spi_master pxa_nssp_master_info = {
 	.ssp_type = PXA25x_NSSP, /* Type of SSP */
-	.clock_enable = CKEN9_NSSP, /* NSSP Peripheral clock */
+	.clock_enable = CKEN_NSSP, /* NSSP Peripheral clock */
 	.num_chipselect = 1, /* Matches the number of chips attached to NSSP */
 	.enable_dma = 1, /* Enables NSSP DMA */
 };
-- 
cgit v1.2.3


From 85796e7d939a39787f10a643477298678fed85db Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor@insightbb.com>
Date: Sun, 29 Apr 2007 23:42:08 -0400
Subject: Input: update some documentation

Input-programming.txt got out of sync with the latest changes in input
core; let's refresh it.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 Documentation/input/input-programming.txt | 125 +++++++++++++++++-------------
 1 file changed, 72 insertions(+), 53 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/input/input-programming.txt b/Documentation/input/input-programming.txt
index 180e0689676..d9d523099bb 100644
--- a/Documentation/input/input-programming.txt
+++ b/Documentation/input/input-programming.txt
@@ -1,5 +1,3 @@
-$Id: input-programming.txt,v 1.4 2001/05/04 09:47:14 vojtech Exp $
-
 Programming input drivers
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -20,28 +18,51 @@ pressed or released a BUTTON_IRQ happens. The driver could look like:
 #include <asm/irq.h>
 #include <asm/io.h>
 
+static struct input_dev *button_dev;
+
 static void button_interrupt(int irq, void *dummy, struct pt_regs *fp)
 {
-	input_report_key(&button_dev, BTN_1, inb(BUTTON_PORT) & 1);
-	input_sync(&button_dev);
+	input_report_key(button_dev, BTN_1, inb(BUTTON_PORT) & 1);
+	input_sync(button_dev);
 }
 
 static int __init button_init(void)
 {
+	int error;
+
 	if (request_irq(BUTTON_IRQ, button_interrupt, 0, "button", NULL)) {
                 printk(KERN_ERR "button.c: Can't allocate irq %d\n", button_irq);
                 return -EBUSY;
         }
-	
-	button_dev.evbit[0] = BIT(EV_KEY);
-	button_dev.keybit[LONG(BTN_0)] = BIT(BTN_0);
-	
-	input_register_device(&button_dev);
+
+	button_dev = input_allocate_device();
+	if (!button_dev) {
+		printk(KERN_ERR "button.c: Not enough memory\n");
+		error = -ENOMEM;
+		goto err_free_irq;
+	}
+
+	button_dev->evbit[0] = BIT(EV_KEY);
+	button_dev->keybit[LONG(BTN_0)] = BIT(BTN_0);
+
+	error = input_register_device(button_dev);
+	if (error) {
+		printk(KERN_ERR "button.c: Failed to register device\n");
+		goto err_free_dev;
+	}
+
+	return 0;
+
+ err_free_dev:
+	input_free_device(button_dev);
+ err_free_irq:
+	free_irq(BUTTON_IRQ, button_interrupt);
+	return error;
 }
 
 static void __exit button_exit(void)
 {
-        input_unregister_device(&button_dev);
+        input_unregister_device(button_dev);
 	free_irq(BUTTON_IRQ, button_interrupt);
 }
 
@@ -58,17 +79,18 @@ In the _init function, which is called either upon module load or when
 booting the kernel, it grabs the required resources (it should also check
 for the presence of the device).
 
-Then it sets the input bitfields. This way the device driver tells the other
+Then it allocates a new input device structure with input_aloocate_device()
+and sets up input bitfields. This way the device driver tells the other
 parts of the input systems what it is - what events can be generated or
-accepted by this input device. Our example device can only generate EV_KEY type
-events, and from those only BTN_0 event code. Thus we only set these two
-bits. We could have used
+accepted by this input device. Our example device can only generate EV_KEY
+type events, and from those only BTN_0 event code. Thus we only set these
+two bits. We could have used
 
 	set_bit(EV_KEY, button_dev.evbit);
 	set_bit(BTN_0, button_dev.keybit);
 
 as well, but with more than single bits the first approach tends to be
-shorter. 
+shorter.
 
 Then the example driver registers the input device structure by calling
 
@@ -76,16 +98,15 @@ Then the example driver registers the input device structure by calling
 
 This adds the button_dev structure to linked lists of the input driver and
 calls device handler modules _connect functions to tell them a new input
-device has appeared. Because the _connect functions may call kmalloc(,
-GFP_KERNEL), which can sleep, input_register_device() must not be called
-from an interrupt or with a spinlock held.
+device has appeared. input_register_device() may sleep and therefore must
+not be called from an interrupt or with a spinlock held.
 
 While in use, the only used function of the driver is
 
 	button_interrupt()
 
 which upon every interrupt from the button checks its state and reports it
-via the 
+via the
 
 	input_report_key()
 
@@ -113,16 +134,10 @@ can use the open and close callback to know when it can stop polling or
 release the interrupt and when it must resume polling or grab the interrupt
 again. To do that, we would add this to our example driver:
 
-int button_used = 0;
-
 static int button_open(struct input_dev *dev)
 {
-        if (button_used++)
-                return 0;
-
 	if (request_irq(BUTTON_IRQ, button_interrupt, 0, "button", NULL)) {
                 printk(KERN_ERR "button.c: Can't allocate irq %d\n", button_irq);
-                button_used--;
                 return -EBUSY;
         }
 
@@ -131,20 +146,21 @@ static int button_open(struct input_dev *dev)
 
 static void button_close(struct input_dev *dev)
 {
-        if (!--button_used)
-                free_irq(IRQ_AMIGA_VERTB, button_interrupt);
+        free_irq(IRQ_AMIGA_VERTB, button_interrupt);
 }
 
 static int __init button_init(void)
 {
 	...
-	button_dev.open = button_open;
-	button_dev.close = button_close;
+	button_dev->open = button_open;
+	button_dev->close = button_close;
 	...
 }
 
-Note the button_used variable - we have to track how many times the open
-function was called to know when exactly our device stops being used.
+Note that input core keeps track of number of users for the device and
+makes sure that dev->open() is called only when the first user connects
+to the device and that dev->close() is called when the very last user
+disconnects. Calls to both callbacks are serialized.
 
 The open() callback should return a 0 in case of success or any nonzero value
 in case of failure. The close() callback (which is void) must always succeed.
@@ -175,7 +191,7 @@ set the corresponding bits and call the
 
 	input_report_rel(struct input_dev *dev, int code, int value)
 
-function. Events are generated only for nonzero value. 
+function. Events are generated only for nonzero value.
 
 However EV_ABS requires a little special care. Before calling
 input_register_device, you have to fill additional fields in the input_dev
@@ -187,6 +203,10 @@ the ABS_X axis:
 	button_dev.absfuzz[ABS_X] = 4;
 	button_dev.absflat[ABS_X] = 8;
 
+Or, you can just say:
+
+	input_set_abs_params(button_dev, ABS_X, 0, 255, 4, 8);
+
 This setting would be appropriate for a joystick X axis, with the minimum of
 0, maximum of 255 (which the joystick *must* be able to reach, no problem if
 it sometimes reports more, but it must be able to always reach the min and
@@ -197,14 +217,7 @@ If you don't need absfuzz and absflat, you can set them to zero, which mean
 that the thing is precise and always returns to exactly the center position
 (if it has any).
 
-1.4 The void *private field
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-This field in the input structure can be used to point to any private data
-structures in the input device driver, in case the driver handles more than
-one device. You'll need it in the open and close callbacks.
-
-1.5 NBITS(), LONG(), BIT()
+1.4 NBITS(), LONG(), BIT()
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 These three macros from input.h help some bitfield computations:
@@ -213,13 +226,9 @@ These three macros from input.h help some bitfield computations:
 	LONG(x)  - returns the index in the array in longs for bit x
 	BIT(x)   - returns the index in a long for bit x
 
-1.6 The number, id* and name fields
+1.5 The id* and name fields
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-The dev->number is assigned by the input system to the input device when it
-is registered. It has no use except for identifying the device to the user
-in system messages.
-
 The dev->name should be set before registering the input device by the input
 device driver. It's a string like 'Generic button device' containing a
 user friendly name of the device.
@@ -234,15 +243,25 @@ driver.
 
 The id and name fields can be passed to userland via the evdev interface.
 
-1.7 The keycode, keycodemax, keycodesize fields
+1.6 The keycode, keycodemax, keycodesize fields
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-These two fields will be used for any input devices that report their data
-as scancodes. If not all scancodes can be known by autodetection, they may
-need to be set by userland utilities. The keycode array then is an array
-used to map from scancodes to input system keycodes. The keycode max will
-contain the size of the array and keycodesize the size of each entry in it
-(in bytes).
+These three fields should be used by input devices that have dense keymaps.
+The keycode is an array used to map from scancodes to input system keycodes.
+The keycode max should contain the size of the array and keycodesize the
+size of each entry in it (in bytes).
+
+Userspace can query and alter current scancode to keycode mappings using
+EVIOCGKEYCODE and EVIOCSKEYCODE ioctls on corresponding evdev interface.
+When a device has all 3 aforementioned fields filled in, the driver may
+rely on kernel's default implementation of setting and querying keycode
+mappings.
+
+1.7 dev->getkeycode() and dev->setkeycode()
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+getkeycode() and setkeycode() callbacks allow drivers to override default
+keycode/keycodesize/keycodemax mapping mechanism provided by input core
+and implement sparse keycode maps.
 
 1.8 Key autorepeat
 ~~~~~~~~~~~~~~~~~~
@@ -266,7 +285,7 @@ direction - from the system to the input device driver. If your input device
 driver can handle these events, it has to set the respective bits in evbit,
 *and* also the callback routine:
 
-	button_dev.event = button_event;
+	button_dev->event = button_event;
 
 int button_event(struct input_dev *dev, unsigned int type, unsigned int code, int value);
 {
-- 
cgit v1.2.3


From fccb56e4d82132ac15359efc9e419371e4533437 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Tue, 1 May 2007 23:26:27 +0200
Subject: i2c: Kill i2c_adapter.class_dev

Kill i2c_adapter.class_dev. Instead, set the class of i2c_adapter.dev
to i2c_adapter_class, so that a symlink will be created for every
i2c_adapter in /sys/class/i2c-adapter.

The same change must be mirrored to i2c-isa as it duplicates some
of the i2c-core functionalities.

User-space tools and libraries might need some adjustments. In
particular, libsensors from lm_sensors 2.10.3 or later is required for
proper discovery of i2c adapter names after this change.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
---
 Documentation/feature-removal-schedule.txt | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 5c88ba1ea26..c4b3bdad15d 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -190,18 +190,10 @@ Who:	Jean Delvare <khali@linux-fr.org>
 
 ---------------------------
 
-What:	i2c_adapter.dev
-	i2c_adapter.list
+What:	i2c_adapter.list
 When:	July 2007
-Why:	Superfluous, given i2c_adapter.class_dev:
-	  * The "dev" was a stand-in for the physical device node that legacy
-	    drivers would not have; but now it's almost always present.  Any
-	    remaining legacy drivers must upgrade (they now trigger warnings).
-	  * The "list" duplicates class device children.
-	The delay in removing this is so upgraded lm_sensors and libsensors
-	can get deployed.  (Removal causes minor changes in the sysfs layout,
-	notably the location of the adapter type name and parenting the i2c
-	client hardware directly from their controller.)
+Why:	Superfluous, this list duplicates the one maintained by the driver
+	core.
 Who:	Jean Delvare <khali@linux-fr.org>,
 	David Brownell <dbrownell@users.sourceforge.net>
 
-- 
cgit v1.2.3


From f75803de6ae9aaebaf096d4590b40503c896eca7 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Tue, 1 May 2007 23:26:29 +0200
Subject: i2c-nforce2: Add support for the MCP61 and MCP65

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Cc: Hans-Frieder Vogt <hfvogt@gmx.net>
---
 Documentation/i2c/busses/i2c-nforce2 | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/i2c/busses/i2c-nforce2 b/Documentation/i2c/busses/i2c-nforce2
index 7f61fbc03f7..fae3495bcba 100644
--- a/Documentation/i2c/busses/i2c-nforce2
+++ b/Documentation/i2c/busses/i2c-nforce2
@@ -9,6 +9,8 @@ Supported adapters:
   * nForce4 MCP-04             10de:0034
   * nForce4 MCP51              10de:0264
   * nForce4 MCP55              10de:0368
+  * nForce4 MCP61              10de:03EB
+  * nForce4 MCP65              10de:0446
 
 Datasheet: not publicly available, but seems to be similar to the
            AMD-8111 SMBus 2.0 adapter.
-- 
cgit v1.2.3


From 4298cfc3eb6110df989f784be516c6340c597a66 Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Tue, 1 May 2007 23:26:31 +0200
Subject: i2c: i2c probe() and remove() documented

Update Documentation/i2c to match previous patches updating probe()
and remove() logic.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Jean Delvare <khali@linux-fr.org>
---
 Documentation/i2c/summary         | 29 +++++++++-----
 Documentation/i2c/writing-clients | 82 ++++++++++++++++++++++++++++++++++-----
 2 files changed, 92 insertions(+), 19 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/i2c/summary b/Documentation/i2c/summary
index 41dde877679..aea60bf7e8f 100644
--- a/Documentation/i2c/summary
+++ b/Documentation/i2c/summary
@@ -4,17 +4,23 @@ I2C and SMBus
 =============
 
 I2C (pronounce: I squared C) is a protocol developed by Philips. It is a 
-slow two-wire protocol (10-400 kHz), but it suffices for many types of 
-devices.
+slow two-wire protocol (variable speed, up to 400 kHz), with a high speed
+extension (3.4 MHz).  It provides an inexpensive bus for connecting many
+types of devices with infrequent or low bandwidth communications needs.
+I2C is widely used with embedded systems.  Some systems use variants that
+don't meet branding requirements, and so are not advertised as being I2C.
 
-SMBus (System Management Bus) is a subset of the I2C protocol. Many
-modern mainboards have a System Management Bus. There are a lot of 
-devices which can be connected to a SMBus; the most notable are modern 
-memory chips with EEPROM memories and chips for hardware monitoring.
+SMBus (System Management Bus) is based on the I2C protocol, and is mostly
+a subset of I2C protocols and signaling.  Many I2C devices will work on an
+SMBus, but some SMBus protocols add semantics beyond what is required to
+achieve I2C branding.  Modern PC mainboards rely on SMBus.  The most common
+devices connected through SMBus are RAM modules configured using I2C EEPROMs,
+and hardware monitoring chips.
 
-Because the SMBus is just a special case of the generalized I2C bus, we
-can simulate the SMBus protocol on plain I2C busses. The reverse is
-regretfully impossible.
+Because the SMBus is mostly a subset of the generalized I2C bus, we can
+use its protocols on many I2C systems.  However, there are systems that don't
+meet both SMBus and I2C electrical constraints; and others which can't
+implement all the common SMBus protocol semantics or messages.
 
 
 Terminology
@@ -29,6 +35,7 @@ When we talk about I2C, we use the following terms:
 An Algorithm driver contains general code that can be used for a whole class
 of I2C adapters. Each specific adapter driver depends on one algorithm
 driver.
+
 A Driver driver (yes, this sounds ridiculous, sorry) contains the general
 code to access some type of device. Each detected device gets its own
 data in the Client structure. Usually, Driver and Client are more closely
@@ -40,6 +47,10 @@ a separate Adapter and Algorithm driver), and drivers for your I2C devices
 in this package. See the lm_sensors project http://www.lm-sensors.nu
 for device drivers.
 
+At this time, Linux only operates I2C (or SMBus) in master mode; you can't
+use these APIs to make a Linux system behave as a slave/device, either to
+speak a custom protocol or to emulate some other device.
+
 
 Included Bus Drivers
 ====================
diff --git a/Documentation/i2c/writing-clients b/Documentation/i2c/writing-clients
index fbcff96f4ca..54255fd68ec 100644
--- a/Documentation/i2c/writing-clients
+++ b/Documentation/i2c/writing-clients
@@ -1,5 +1,5 @@
 This is a small guide for those who want to write kernel drivers for I2C
-or SMBus devices.
+or SMBus devices, using Linux as the protocol host/master (not slave).
 
 To set up a driver, you need to do several things. Some are optional, and
 some things can be done slightly or completely different. Use this as a
@@ -29,8 +29,16 @@ static struct i2c_driver foo_driver = {
 	.driver = {
 		.name	= "foo",
 	},
+
+	/* iff driver uses driver model ("new style") binding model: */
+	.probe		= foo_probe,
+	.remove		= foo_remove,
+
+	/* else, driver uses "legacy" binding model: */
 	.attach_adapter	= foo_attach_adapter,
 	.detach_client	= foo_detach_client,
+
+	/* these may be used regardless of the driver binding model */
 	.shutdown	= foo_shutdown,	/* optional */
 	.suspend	= foo_suspend,	/* optional */
 	.resume		= foo_resume,	/* optional */
@@ -40,7 +48,8 @@ static struct i2c_driver foo_driver = {
 The name field is the driver name, and must not contain spaces.  It
 should match the module name (if the driver can be compiled as a module),
 although you can use MODULE_ALIAS (passing "foo" in this example) to add
-another name for the module.
+another name for the module.  If the driver name doesn't match the module
+name, the module won't be automatically loaded (hotplug/coldplug).
 
 All other fields are for call-back functions which will be explained 
 below.
@@ -141,6 +150,59 @@ Writing is done the same way.
 Probing and attaching
 =====================
 
+The Linux I2C stack was originally written to support access to hardware
+monitoring chips on PC motherboards, and thus it embeds some assumptions
+that are more appropriate to SMBus (and PCs) than to I2C.  One of these
+assumptions is that most adapters and devices drivers support the SMBUS_QUICK
+protocol to probe device presence.  Another is that devices and their drivers
+can be sufficiently configured using only such probe primitives.
+
+As Linux and its I2C stack became more widely used in embedded systems
+and complex components such as DVB adapters, those assumptions became more
+problematic.  Drivers for I2C devices that issue interrupts need more (and
+different) configuration information, as do drivers handling chip variants
+that can't be distinguished by protocol probing, or which need some board
+specific information to operate correctly.
+
+Accordingly, the I2C stack now has two models for associating I2C devices
+with their drivers:  the original "legacy" model, and a newer one that's
+fully compatible with the Linux 2.6 driver model.  These models do not mix,
+since the "legacy" model requires drivers to create "i2c_client" device
+objects after SMBus style probing, while the Linux driver model expects
+drivers to be given such device objects in their probe() routines.
+
+
+Standard Driver Model Binding ("New Style")
+-------------------------------------------
+
+System infrastructure, typically board-specific initialization code or
+boot firmware, reports what I2C devices exist.  For example, there may be
+a table, in the kernel or from the boot loader, identifying I2C devices
+and linking them to board-specific configuration information about IRQs
+and other wiring artifacts, chip type, and so on.  That could be used to
+create i2c_client objects for each I2C device.
+
+I2C device drivers using this binding model work just like any other
+kind of driver in Linux:  they provide a probe() method to bind to
+those devices, and a remove() method to unbind.
+
+	static int foo_probe(struct i2c_client *client);
+	static int foo_remove(struct i2c_client *client);
+
+Remember that the i2c_driver does not create those client handles.  The
+handle may be used during foo_probe().  If foo_probe() reports success
+(zero not a negative status code) it may save the handle and use it until
+foo_remove() returns.  That binding model is used by most Linux drivers.
+
+Drivers match devices when i2c_client.driver_name and the driver name are
+the same; this approach is used in several other busses that don't have
+device typing support in the hardware.  The driver and module name should
+match, so hotplug/coldplug mechanisms will modprobe the driver.
+
+
+Legacy Driver Binding Model
+---------------------------
+
 Most i2c devices can be present on several i2c addresses; for some this
 is determined in hardware (by soldering some chip pins to Vcc or Ground),
 for others this can be changed in software (by writing to specific client
@@ -162,8 +224,8 @@ NOTE: If you want to write a `sensors' driver, the interface is slightly
 
 
-Probing classes
----------------
+Probing classes (Legacy model)
+------------------------------
 
 All parameters are given as lists of unsigned 16-bit integers. Lists are
 terminated by I2C_CLIENT_END.
@@ -210,8 +272,8 @@ Note that you *have* to call the defined variable `normal_i2c',
 without any prefix!
 
 
-Attaching to an adapter
------------------------
+Attaching to an adapter (Legacy model)
+--------------------------------------
 
 Whenever a new adapter is inserted, or for all adapters if the driver is
 being registered, the callback attach_adapter() is called. Now is the
@@ -237,8 +299,8 @@ them (unless a `force' parameter was used). In addition, addresses that
 are already in use (by some other registered client) are skipped.
 
 
-The detect client function
---------------------------
+The detect client function (Legacy model)
+-----------------------------------------
 
 The detect client function is called by i2c_probe. The `kind' parameter
 contains -1 for a probed detection, 0 for a forced detection, or a positive
@@ -427,8 +489,8 @@ For now, you can ignore the `flags' parameter. It is there for future use.
   }
 
 
-Removing the client
-===================
+Removing the client (Legacy model)
+==================================
 
 The detach_client call back function is called when a client should be
 removed. It may actually fail, but only when panicking. This code is
-- 
cgit v1.2.3


From ce9e0794c23fb1d0222cb10009a198b427dcf6ad Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Tue, 1 May 2007 23:26:32 +0200
Subject: i2c: Document i2c_new_device()

Document the new i2c_new_device(), i2c_new_probed_device() and
i2c_unregister_device() functions.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
---
 Documentation/i2c/writing-clients | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/i2c/writing-clients b/Documentation/i2c/writing-clients
index 54255fd68ec..e62fbfa1282 100644
--- a/Documentation/i2c/writing-clients
+++ b/Documentation/i2c/writing-clients
@@ -200,6 +200,44 @@ device typing support in the hardware.  The driver and module name should
 match, so hotplug/coldplug mechanisms will modprobe the driver.
 
 
+Device Creation (Standard driver model)
+---------------------------------------
+
+If you know for a fact that an I2C device is connected to a given I2C bus,
+you can instantiate that device by simply filling an i2c_board_info
+structure with the device address and driver name, and calling
+i2c_new_device().  This will create the device, then the driver core will
+take care of finding the right driver and will call its probe() method.
+If a driver supports different device types, you can specify the type you
+want using the type field.  You can also specify an IRQ and platform data
+if needed.
+
+Sometimes you know that a device is connected to a given I2C bus, but you
+don't know the exact address it uses.  This happens on TV adapters for
+example, where the same driver supports dozens of slightly different
+models, and I2C device addresses change from one model to the next.  In
+that case, you can use the i2c_new_probed_device() variant, which is
+similar to i2c_new_device(), except that it takes an additional list of
+possible I2C addresses to probe.  A device is created for the first
+responsive address in the list.  If you expect more than one device to be
+present in the address range, simply call i2c_new_probed_device() that
+many times.
+
+The call to i2c_new_device() or i2c_new_probed_device() typically happens
+in the I2C bus driver. You may want to save the returned i2c_client
+reference for later use.
+
+
+Device Deletion (Standard driver model)
+---------------------------------------
+
+Each I2C device which has been created using i2c_new_device() or
+i2c_new_probed_device() can be unregistered by calling
+i2c_unregister_device().  If you don't call it explicitly, it will be
+called automatically before the underlying I2C bus itself is removed, as a
+device can't survive its parent in the device driver model.
+
+
 Legacy Driver Binding Model
 ---------------------------
 
-- 
cgit v1.2.3


From b3e820968ad47219f7d559117a30e85cf96b4e4e Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Tue, 1 May 2007 23:26:32 +0200
Subject: i2c: Make i2c_del_driver a void function

Make i2c_del_driver a void function, like all other driver removal
functions. It always returned 0 even when errors occured, and nobody
ever actually checked the return value anyway. And we cannot fail
a module removal anyway.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
---
 Documentation/i2c/writing-clients | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/i2c/writing-clients b/Documentation/i2c/writing-clients
index e62fbfa1282..f7e04ec849b 100644
--- a/Documentation/i2c/writing-clients
+++ b/Documentation/i2c/writing-clients
@@ -586,10 +586,7 @@ the driver module is usually enough.
   void foo_cleanup(void)
   {
     if (foo_initialized == 1) {
-      if ((res = i2c_del_driver(&foo_driver))) {
-        printk("foo: Driver registration failed, module not removed.\n");
-        return;
-      }
+      i2c_del_driver(&foo_driver);
       foo_initialized --;
     }
   }
-- 
cgit v1.2.3


From 11de70bd4d40a1a39c1133b260bfbd6306e981d3 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Tue, 1 May 2007 23:26:34 +0200
Subject: i2c: Obsolete i2c-ixp2000, i2c-ixp4xx and scx200_i2c

The new generic i2c-gpio driver should be used instead.
The obsolete drivers will be removed in September 2007.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Cc: Deepak Saxena <dsaxena@plexity.net>
Cc: Jordan Crouse <jordan.crouse@amd.com>
---
 Documentation/feature-removal-schedule.txt | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index c4b3bdad15d..547663bdae8 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -306,3 +306,11 @@ Why:	Code was merged, then submitter immediately disappeared leaving
 Who:	David S. Miller <davem@davemloft.net>
 
 ---------------------------
+
+What:	i2c-ixp2000, i2c-ixp4xx and scx200_i2c drivers
+When:	September 2007
+Why:	Obsolete. The new i2c-gpio driver replaces all hardware-specific
+	I2C-over-GPIO drivers.
+Who:	Jean Delvare <khali@linux-fr.org>
+
+---------------------------
-- 
cgit v1.2.3


From eefcd75e72f382270f8f64e030550b10e3882b2b Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Tue, 1 May 2007 23:26:35 +0200
Subject: i2c: Documentation update

Make the documentation on how to write and port i2c drivers more in
line with the current state of things:
* i2c-isa is deprecated and soon gone, so stop advertising it.
* Drop many sensors-specific references. Most of them were outdated
  anyway.
* Update the example code to reflect the recent and not-so-recent
  API and coding style preference changes.
* Simplify the example init and cleanup functions.

This should make things less complex to understand for newcomers.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
---
 Documentation/i2c/porting-clients |  18 ++-
 Documentation/i2c/writing-clients | 292 ++++----------------------------------
 2 files changed, 39 insertions(+), 271 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/i2c/porting-clients b/Documentation/i2c/porting-clients
index ca272b263a9..7bf82c08f6c 100644
--- a/Documentation/i2c/porting-clients
+++ b/Documentation/i2c/porting-clients
@@ -1,4 +1,4 @@
-Revision 6, 2005-11-20
+Revision 7, 2007-04-19
 Jean Delvare <khali@linux-fr.org>
 Greg KH <greg@kroah.com>
 
@@ -20,6 +20,10 @@ yours for best results.
 
 Technical changes:
 
+* [Driver type] Any driver that was relying on i2c-isa has to be
+  converted to a proper isa, platform or pci driver. This is not
+  covered by this guide.
+
 * [Includes] Get rid of "version.h" and <linux/i2c-proc.h>.
   Includes typically look like that:
   #include <linux/module.h>
@@ -27,12 +31,10 @@ Technical changes:
   #include <linux/slab.h>
   #include <linux/jiffies.h>
   #include <linux/i2c.h>
-  #include <linux/i2c-isa.h>	/* for ISA drivers */
   #include <linux/hwmon.h>	/* for hardware monitoring drivers */
   #include <linux/hwmon-sysfs.h>
   #include <linux/hwmon-vid.h>	/* if you need VRM support */
   #include <linux/err.h>	/* for class registration */
-  #include <asm/io.h>		/* if you have I/O operations */
   Please respect this inclusion order. Some extra headers may be
   required for a given driver (e.g. "lm75.h").
 
@@ -69,20 +71,16 @@ Technical changes:
   sensors mailing list <lm-sensors@lm-sensors.org> by providing a
   patch to the Documentation/hwmon/sysfs-interface file.
 
-* [Attach] For I2C drivers, the attach function should make sure
-  that the adapter's class has I2C_CLASS_HWMON (or whatever class is
-  suitable for your driver), using the following construct:
+* [Attach] The attach function should make sure that the adapter's
+  class has I2C_CLASS_HWMON (or whatever class is suitable for your
+  driver), using the following construct:
   if (!(adapter->class & I2C_CLASS_HWMON))
           return 0;
-  ISA-only drivers of course don't need this.
   Call i2c_probe() instead of i2c_detect().
 
 * [Detect] As mentioned earlier, the flags parameter is gone.
   The type_name and client_name strings are replaced by a single
   name string, which will be filled with a lowercase, short string.
-  In i2c-only drivers, drop the i2c_is_isa_adapter check, it's
-  useless. Same for isa-only drivers, as the test would always be
-  true. Only hybrid drivers (which are quite rare) still need it.
   The labels used for error paths are reduced to the number needed.
   It is advised that the labels are given descriptive names such as
   exit and exit_free. Don't forget to properly set err before
diff --git a/Documentation/i2c/writing-clients b/Documentation/i2c/writing-clients
index f7e04ec849b..3d8d36b0ad1 100644
--- a/Documentation/i2c/writing-clients
+++ b/Documentation/i2c/writing-clients
@@ -74,16 +74,13 @@ An example structure is below.
 
   struct foo_data {
     struct i2c_client client;
-    struct semaphore lock; /* For ISA access in `sensors' drivers. */
-    int sysctl_id;         /* To keep the /proc directory entry for 
-                              `sensors' drivers. */
     enum chips type;       /* To keep the chips type for `sensors' drivers. */
    
     /* Because the i2c bus is slow, it is often useful to cache the read
        information of a chip for some time (for example, 1 or 2 seconds).
        It depends of course on the device whether this is really worthwhile
        or even sensible. */
-    struct semaphore update_lock; /* When we are reading lots of information,
+    struct mutex update_lock;     /* When we are reading lots of information,
                                      another process should not update the
                                      below information */
     char valid;                   /* != 0 if the following fields are valid. */
@@ -104,8 +101,7 @@ some obscure clients). But we need generic reading and writing routines.
 I have found it useful to define foo_read and foo_write function for this.
 For some cases, it will be easier to call the i2c functions directly,
 but many chips have some kind of register-value idea that can easily
-be encapsulated. Also, some chips have both ISA and I2C interfaces, and
-it useful to abstract from this (only for `sensors' drivers).
+be encapsulated.
 
 The below functions are simple examples, and should not be copied
 literally.
@@ -128,24 +124,6 @@ literally.
       return i2c_smbus_write_word_data(client,reg,value);
   }
 
-For sensors code, you may have to cope with ISA registers too. Something
-like the below often works. Note the locking! 
-
-  int foo_read_value(struct i2c_client *client, u8 reg)
-  {
-    int res;
-    if (i2c_is_isa_client(client)) {
-      down(&(((struct foo_data *) (client->data)) -> lock));
-      outb_p(reg,client->addr + FOO_ADDR_REG_OFFSET);
-      res = inb_p(client->addr + FOO_DATA_REG_OFFSET);
-      up(&(((struct foo_data *) (client->data)) -> lock));
-      return res;
-    } else
-      return i2c_smbus_read_byte_data(client,reg);
-  }
-
-Writing is done the same way.
-
 
 Probing and attaching
 =====================
@@ -257,10 +235,6 @@ detection algorithm.
 You do not have to use this parameter interface; but don't try to use
 function i2c_probe() if you don't.
 
-NOTE: If you want to write a `sensors' driver, the interface is slightly
-      different! See below.
-
-
 
 Probing classes (Legacy model)
 ------------------------------
@@ -344,10 +318,6 @@ The detect client function is called by i2c_probe. The `kind' parameter
 contains -1 for a probed detection, 0 for a forced detection, or a positive
 number for a forced detection with a chip type forced.
 
-Below, some things are only needed if this is a `sensors' driver. Those
-parts are between /* SENSORS ONLY START */ and /* SENSORS ONLY END */
-markers. 
-
 Returning an error different from -ENODEV in a detect function will cause
 the detection to stop: other addresses and adapters won't be scanned.
 This should only be done on fatal or internal errors, such as a memory
@@ -356,64 +326,20 @@ shortage or i2c_attach_client failing.
 For now, you can ignore the `flags' parameter. It is there for future use.
 
   int foo_detect_client(struct i2c_adapter *adapter, int address, 
-                        unsigned short flags, int kind)
+                        int kind)
   {
     int err = 0;
     int i;
-    struct i2c_client *new_client;
+    struct i2c_client *client;
     struct foo_data *data;
-    const char *client_name = ""; /* For non-`sensors' drivers, put the real
-                                     name here! */
+    const char *name = "";
    
     /* Let's see whether this adapter can support what we need.
-       Please substitute the things you need here! 
-       For `sensors' drivers, add `! is_isa &&' to the if statement */
+       Please substitute the things you need here! */
     if (!i2c_check_functionality(adapter,I2C_FUNC_SMBUS_WORD_DATA |
                                         I2C_FUNC_SMBUS_WRITE_BYTE))
        goto ERROR0;
 
-    /* SENSORS ONLY START */
-    const char *type_name = "";
-    int is_isa = i2c_is_isa_adapter(adapter);
-
-    /* Do this only if the chip can additionally be found on the ISA bus
-       (hybrid chip). */
-
-    if (is_isa) {
-
-      /* Discard immediately if this ISA range is already used */
-      /* FIXME: never use check_region(), only request_region() */
-      if (check_region(address,FOO_EXTENT))
-        goto ERROR0;
-
-      /* Probe whether there is anything on this address.
-         Some example code is below, but you will have to adapt this
-         for your own driver */
-
-      if (kind < 0) /* Only if no force parameter was used */ {
-        /* We may need long timeouts at least for some chips. */
-        #define REALLY_SLOW_IO
-        i = inb_p(address + 1);
-        if (inb_p(address + 2) != i)
-          goto ERROR0;
-        if (inb_p(address + 3) != i)
-          goto ERROR0;
-        if (inb_p(address + 7) != i)
-          goto ERROR0;
-        #undef REALLY_SLOW_IO
-
-        /* Let's just hope nothing breaks here */
-        i = inb_p(address + 5) & 0x7f;
-        outb_p(~i & 0x7f,address+5);
-        if ((inb_p(address + 5) & 0x7f) != (~i & 0x7f)) {
-          outb_p(i,address+5);
-          return 0;
-        }
-      }
-    }
-
-    /* SENSORS ONLY END */
-
     /* OK. For now, we presume we have a valid client. We now create the
        client structure, even though we cannot fill it completely yet.
        But it allows us to access several i2c functions safely */
@@ -423,13 +349,12 @@ For now, you can ignore the `flags' parameter. It is there for future use.
       goto ERROR0;
     }
 
-    new_client = &data->client;
-    i2c_set_clientdata(new_client, data);
+    client = &data->client;
+    i2c_set_clientdata(client, data);
 
-    new_client->addr = address;
-    new_client->adapter = adapter;
-    new_client->driver = &foo_driver;
-    new_client->flags = 0;
+    client->addr = address;
+    client->adapter = adapter;
+    client->driver = &foo_driver;
 
     /* Now, we do the remaining detection. If no `force' parameter is used. */
 
@@ -437,19 +362,17 @@ For now, you can ignore the `flags' parameter. It is there for future use.
        parameter was used. */
     if (kind < 0) {
       /* The below is of course bogus */
-      if (foo_read(new_client,FOO_REG_GENERIC) != FOO_GENERIC_VALUE)
+      if (foo_read(client, FOO_REG_GENERIC) != FOO_GENERIC_VALUE)
          goto ERROR1;
     }
 
-    /* SENSORS ONLY START */
-
     /* Next, specific detection. This is especially important for `sensors'
        devices. */
 
     /* Determine the chip type. Not needed if a `force_CHIPTYPE' parameter
        was used. */
     if (kind <= 0) {
-      i = foo_read(new_client,FOO_REG_CHIPTYPE);
+      i = foo_read(client, FOO_REG_CHIPTYPE);
       if (i == FOO_TYPE_1) 
         kind = chip1; /* As defined in the enum */
       else if (i == FOO_TYPE_2)
@@ -463,63 +386,31 @@ For now, you can ignore the `flags' parameter. It is there for future use.
 
     /* Now set the type and chip names */
     if (kind == chip1) {
-      type_name = "chip1"; /* For /proc entry */
-      client_name = "CHIP 1";
+      name = "chip1";
     } else if (kind == chip2) {
-      type_name = "chip2"; /* For /proc entry */
-      client_name = "CHIP 2";
+      name = "chip2";
     }
    
-    /* Reserve the ISA region */
-    if (is_isa)
-      request_region(address,FOO_EXTENT,type_name);
-
-    /* SENSORS ONLY END */
-
     /* Fill in the remaining client fields. */
-    strcpy(new_client->name,client_name);
-
-    /* SENSORS ONLY BEGIN */
+    strlcpy(client->name, name, I2C_NAME_SIZE);
     data->type = kind;
-    /* SENSORS ONLY END */
-
-    data->valid = 0; /* Only if you use this field */
-    init_MUTEX(&data->update_lock); /* Only if you use this field */
+    mutex_init(&data->update_lock); /* Only if you use this field */
 
     /* Any other initializations in data must be done here too. */
 
-    /* Tell the i2c layer a new client has arrived */
-    if ((err = i2c_attach_client(new_client)))
-      goto ERROR3;
-
-    /* SENSORS ONLY BEGIN */
-    /* Register a new directory entry with module sensors. See below for
-       the `template' structure. */
-    if ((i = i2c_register_entry(new_client, type_name,
-                                    foo_dir_table_template,THIS_MODULE)) < 0) {
-      err = i;
-      goto ERROR4;
-    }
-    data->sysctl_id = i;
-
-    /* SENSORS ONLY END */
-
     /* This function can write default values to the client registers, if
        needed. */
-    foo_init_client(new_client);
+    foo_init_client(client);
+
+    /* Tell the i2c layer a new client has arrived */
+    if ((err = i2c_attach_client(client)))
+      goto ERROR1;
+
     return 0;
 
     /* OK, this is not exactly good programming practice, usually. But it is
        very code-efficient in this case. */
 
-    ERROR4:
-      i2c_detach_client(new_client);
-    ERROR3:
-    ERROR2:
-    /* SENSORS ONLY START */
-      if (is_isa)
-        release_region(address,FOO_EXTENT);
-    /* SENSORS ONLY END */
     ERROR1:
       kfree(data);
     ERROR0:
@@ -536,22 +427,12 @@ much simpler than the attachment code, fortunately!
 
   int foo_detach_client(struct i2c_client *client)
   {
-    int err,i;
-
-    /* SENSORS ONLY START */
-    /* Deregister with the `i2c-proc' module. */
-    i2c_deregister_entry(((struct lm78_data *)(client->data))->sysctl_id);
-    /* SENSORS ONLY END */
+    int err;
 
     /* Try to detach the client from i2c space */
     if ((err = i2c_detach_client(client)))
       return err;
 
-    /* HYBRID SENSORS CHIP ONLY START */
-    if i2c_is_isa_client(client)
-      release_region(client->addr,LM78_EXTENT);
-    /* HYBRID SENSORS CHIP ONLY END */
-
     kfree(i2c_get_clientdata(client));
     return 0;
   }
@@ -564,42 +445,34 @@ When the kernel is booted, or when your foo driver module is inserted,
 you have to do some initializing. Fortunately, just attaching (registering)
 the driver module is usually enough.
 
-  /* Keep track of how far we got in the initialization process. If several
-     things have to initialized, and we fail halfway, only those things
-     have to be cleaned up! */
-  static int __initdata foo_initialized = 0;
-
   static int __init foo_init(void)
   {
     int res;
-    printk("foo version %s (%s)\n",FOO_VERSION,FOO_DATE);
     
     if ((res = i2c_add_driver(&foo_driver))) {
       printk("foo: Driver registration failed, module not inserted.\n");
-      foo_cleanup();
       return res;
     }
-    foo_initialized ++;
     return 0;
   }
 
-  void foo_cleanup(void)
+  static void __exit foo_cleanup(void)
   {
-    if (foo_initialized == 1) {
-      i2c_del_driver(&foo_driver);
-      foo_initialized --;
-    }
+    i2c_del_driver(&foo_driver);
   }
 
   /* Substitute your own name and email address */
   MODULE_AUTHOR("Frodo Looijaard <frodol@dds.nl>"
   MODULE_DESCRIPTION("Driver for Barf Inc. Foo I2C devices");
 
+  /* a few non-GPL license types are also allowed */
+  MODULE_LICENSE("GPL");
+
   module_init(foo_init);
   module_exit(foo_cleanup);
 
 Note that some functions are marked by `__init', and some data structures
-by `__init_data'.  Hose functions and structures can be removed after
+by `__initdata'.  These functions and structures can be removed after
 kernel booting (or module loading) is completed.
 
 
@@ -729,110 +602,7 @@ General purpose routines
 Below all general purpose routines are listed, that were not mentioned
 before.
 
-  /* This call returns a unique low identifier for each registered adapter,
-   * or -1 if the adapter was not registered.
+  /* This call returns a unique low identifier for each registered adapter.
    */
   extern int i2c_adapter_id(struct i2c_adapter *adap);
 
-
-The sensors sysctl/proc interface
-=================================
-
-This section only applies if you write `sensors' drivers.
-
-Each sensors driver creates a directory in /proc/sys/dev/sensors for each
-registered client. The directory is called something like foo-i2c-4-65.
-The sensors module helps you to do this as easily as possible.
-
-The template
-------------
-
-You will need to define a ctl_table template. This template will automatically
-be copied to a newly allocated structure and filled in where necessary when
-you call sensors_register_entry.
-
-First, I will give an example definition.
-  static ctl_table foo_dir_table_template[] = {
-    { FOO_SYSCTL_FUNC1, "func1", NULL, 0, 0644, NULL, &i2c_proc_real,
-      &i2c_sysctl_real,NULL,&foo_func },
-    { FOO_SYSCTL_FUNC2, "func2", NULL, 0, 0644, NULL, &i2c_proc_real,
-      &i2c_sysctl_real,NULL,&foo_func },
-    { FOO_SYSCTL_DATA, "data", NULL, 0, 0644, NULL, &i2c_proc_real,
-      &i2c_sysctl_real,NULL,&foo_data },
-    { 0 }
-  };
-
-In the above example, three entries are defined. They can either be
-accessed through the /proc interface, in the /proc/sys/dev/sensors/*
-directories, as files named func1, func2 and data, or alternatively 
-through the sysctl interface, in the appropriate table, with identifiers
-FOO_SYSCTL_FUNC1, FOO_SYSCTL_FUNC2 and FOO_SYSCTL_DATA.
-
-The third, sixth and ninth parameters should always be NULL, and the
-fourth should always be 0. The fifth is the mode of the /proc file;
-0644 is safe, as the file will be owned by root:root. 
-
-The seventh and eighth parameters should be &i2c_proc_real and
-&i2c_sysctl_real if you want to export lists of reals (scaled
-integers). You can also use your own function for them, as usual.
-Finally, the last parameter is the call-back to gather the data
-(see below) if you use the *_proc_real functions. 
-
-
-Gathering the data
-------------------
-
-The call back functions (foo_func and foo_data in the above example)
-can be called in several ways; the operation parameter determines
-what should be done:
-
-  * If operation == SENSORS_PROC_REAL_INFO, you must return the
-    magnitude (scaling) in nrels_mag;
-  * If operation == SENSORS_PROC_REAL_READ, you must read information
-    from the chip and return it in results. The number of integers
-    to display should be put in nrels_mag;
-  * If operation == SENSORS_PROC_REAL_WRITE, you must write the
-    supplied information to the chip. nrels_mag will contain the number
-    of integers, results the integers themselves.
-
-The *_proc_real functions will display the elements as reals for the
-/proc interface. If you set the magnitude to 2, and supply 345 for
-SENSORS_PROC_REAL_READ, it would display 3.45; and if the user would
-write 45.6 to the /proc file, it would be returned as 4560 for
-SENSORS_PROC_REAL_WRITE. A magnitude may even be negative!
-
-An example function:
-
-  /* FOO_FROM_REG and FOO_TO_REG translate between scaled values and
-     register values. Note the use of the read cache. */
-  void foo_in(struct i2c_client *client, int operation, int ctl_name, 
-              int *nrels_mag, long *results)
-  {
-    struct foo_data *data = client->data;
-    int nr = ctl_name - FOO_SYSCTL_FUNC1; /* reduce to 0 upwards */
-    
-    if (operation == SENSORS_PROC_REAL_INFO)
-      *nrels_mag = 2;
-    else if (operation == SENSORS_PROC_REAL_READ) {
-      /* Update the readings cache (if necessary) */
-      foo_update_client(client);
-      /* Get the readings from the cache */
-      results[0] = FOO_FROM_REG(data->foo_func_base[nr]);
-      results[1] = FOO_FROM_REG(data->foo_func_more[nr]);
-      results[2] = FOO_FROM_REG(data->foo_func_readonly[nr]);
-      *nrels_mag = 2;
-    } else if (operation == SENSORS_PROC_REAL_WRITE) {
-      if (*nrels_mag >= 1) {
-        /* Update the cache */
-        data->foo_base[nr] = FOO_TO_REG(results[0]);
-        /* Update the chip */
-        foo_write_value(client,FOO_REG_FUNC_BASE(nr),data->foo_base[nr]);
-      }
-      if (*nrels_mag >= 2) {
-        /* Update the cache */
-        data->foo_more[nr] = FOO_TO_REG(results[1]);
-        /* Update the chip */
-        foo_write_value(client,FOO_REG_FUNC_MORE(nr),data->foo_more[nr]);
-      }
-    }
-  }
-- 
cgit v1.2.3


From 8b8ca80e192b10eecc01fc44a2902510af86f73b Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Wed, 2 May 2007 19:27:09 +0200
Subject: [PATCH] x86-64: configurable fake numa node sizes

Extends the numa=fake x86_64 command-line option to allow for configurable
node sizes.  These nodes can be used in conjunction with cpusets for coarse
memory resource management.

The old command-line option is still supported:
  numa=fake=32	gives 32 fake NUMA nodes, ignoring the NUMA setup of the
		actual machine.

But now you may configure your system for the node sizes of your choice:
  numa=fake=2*512,1024,2*256
		gives two 512M nodes, one 1024M node, two 256M nodes, and
		the rest of system memory to a sixth node.

The existing hash function is maintained to support the various node sizes
that are possible with this implementation.

Each node of the same size receives roughly the same amount of available
pages, regardless of any reserved memory with its address range.  The total
available pages on the system is calculated and divided by the number of equal
nodes to allocate.  These nodes are then dynamically allocated and their
borders extended until such time as their number of available pages reaches
the required size.

Configurable node sizes are recommended when used in conjunction with cpusets
for memory control because it eliminates the overhead associated with scanning
the zonelists of many smaller full nodes on page_alloc().

Cc: Andi Kleen <ak@suse.de>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Paul Jackson <pj@sgi.com>
Cc: Christoph Lameter <clameter@engr.sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/x86_64/boot-options.txt | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt
index 85f51e5a749..7500aad95f3 100644
--- a/Documentation/x86_64/boot-options.txt
+++ b/Documentation/x86_64/boot-options.txt
@@ -149,7 +149,13 @@ NUMA
 
   numa=noacpi   Don't parse the SRAT table for NUMA setup
 
-  numa=fake=X   Fake X nodes and ignore NUMA setup of the actual machine.
+  numa=fake=CMDLINE
+		If a number, fakes CMDLINE nodes and ignores NUMA setup of the
+		actual machine.  Otherwise, system memory is configured
+		depending on the sizes and coefficients listed.  For example:
+			numa=fake=2*512,1024,4*256
+		gives two 512M nodes, a 1024M node, and four 256M nodes.  The
+		remaining system RAM is allocated to an additional node.
 
   numa=hotadd=percent
 		Only allow hotadd memory to preallocate page structures upto
-- 
cgit v1.2.3


From 14694d736bb66d0ec250d05c81c6e98a19c229c6 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Wed, 2 May 2007 19:27:09 +0200
Subject: [PATCH] x86-64: split remaining fake nodes equally

Extends the numa=fake x86_64 command-line option to split the remaining
system memory into equal-sized nodes.

For example:
numa=fake=2*512,4*	gives two 512M nodes and the remaining system
			memory is split into four approximately equal
			chunks.

This is beneficial for systems where the exact size of RAM is unknown or not
necessarily relevant, but the granularity with which nodes shall be allocated
is known.

Cc: Andi Kleen <ak@suse.de>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Paul Jackson <pj@sgi.com>
Cc: Christoph Lameter <clameter@engr.sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/x86_64/boot-options.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt
index 7500aad95f3..12a9aacecaa 100644
--- a/Documentation/x86_64/boot-options.txt
+++ b/Documentation/x86_64/boot-options.txt
@@ -155,7 +155,9 @@ NUMA
 		depending on the sizes and coefficients listed.  For example:
 			numa=fake=2*512,1024,4*256
 		gives two 512M nodes, a 1024M node, and four 256M nodes.  The
-		remaining system RAM is allocated to an additional node.
+		remaining system RAM is allocated to an additional node.  If
+		the last character of CMDLINE is a *, the remaining system RAM
+		is instead divided up equally among its coefficient.
 
   numa=hotadd=percent
 		Only allow hotadd memory to preallocate page structures upto
-- 
cgit v1.2.3


From 382591d500bbcd20a44416c5e0e292708468587c Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Wed, 2 May 2007 19:27:09 +0200
Subject: [PATCH] x86-64: fixed size remaining fake nodes

Extends the numa=fake x86_64 command-line option to split the remaining system
memory into nodes of fixed size.  Any leftover memory is allocated to a final
node unless the command-line ends with a comma.

For example:
  numa=fake=2*512,*128	gives two 512M nodes and the remaining system
			memory is split into nodes of 128M each.

This is beneficial for systems where the exact size of RAM is unknown or not
necessarily relevant, but the size of the remaining nodes to be allocated is
known based on their capacity for resource management.

Cc: Andi Kleen <ak@suse.de>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Paul Jackson <pj@sgi.com>
Cc: Christoph Lameter <clameter@engr.sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/x86_64/boot-options.txt | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt
index 12a9aacecaa..6177d881983 100644
--- a/Documentation/x86_64/boot-options.txt
+++ b/Documentation/x86_64/boot-options.txt
@@ -153,11 +153,15 @@ NUMA
 		If a number, fakes CMDLINE nodes and ignores NUMA setup of the
 		actual machine.  Otherwise, system memory is configured
 		depending on the sizes and coefficients listed.  For example:
-			numa=fake=2*512,1024,4*256
-		gives two 512M nodes, a 1024M node, and four 256M nodes.  The
-		remaining system RAM is allocated to an additional node.  If
-		the last character of CMDLINE is a *, the remaining system RAM
-		is instead divided up equally among its coefficient.
+			numa=fake=2*512,1024,4*256,*128
+		gives two 512M nodes, a 1024M node, four 256M nodes, and the
+		rest split into 128M chunks.  If the last character of CMDLINE
+		is a *, the remaining memory is divided up equally among its
+		coefficient:
+			numa=fake=2*512,2*
+		gives two 512M nodes and the rest split into two nodes.
+		Otherwise, the remaining system RAM is allocated to an
+		additional node.
 
   numa=hotadd=percent
 		Only allow hotadd memory to preallocate page structures upto
-- 
cgit v1.2.3


From 20280195f2a3d80c42a190959ca22108c93cd7e0 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Wed, 2 May 2007 19:27:09 +0200
Subject: [PATCH] x86-64: fake numa for cpusets document

Create a document to explain how to use numa=fake in conjunction with cpusets
for coarse memory resource management.

An attempt to get more awareness and testing for this feature.

Cc: Andi Kleen <ak@suse.de>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Paul Jackson <pj@sgi.com>
Cc: Christoph Lameter <clameter@engr.sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/x86_64/fake-numa-for-cpusets | 66 ++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)
 create mode 100644 Documentation/x86_64/fake-numa-for-cpusets

(limited to 'Documentation')

diff --git a/Documentation/x86_64/fake-numa-for-cpusets b/Documentation/x86_64/fake-numa-for-cpusets
new file mode 100644
index 00000000000..d1a985c5b00
--- /dev/null
+++ b/Documentation/x86_64/fake-numa-for-cpusets
@@ -0,0 +1,66 @@
+Using numa=fake and CPUSets for Resource Management
+Written by David Rientjes <rientjes@cs.washington.edu>
+
+This document describes how the numa=fake x86_64 command-line option can be used
+in conjunction with cpusets for coarse memory management.  Using this feature,
+you can create fake NUMA nodes that represent contiguous chunks of memory and
+assign them to cpusets and their attached tasks.  This is a way of limiting the
+amount of system memory that are available to a certain class of tasks.
+
+For more information on the features of cpusets, see Documentation/cpusets.txt.
+There are a number of different configurations you can use for your needs.  For
+more information on the numa=fake command line option and its various ways of
+configuring fake nodes, see Documentation/x86_64/boot-options.txt.
+
+For the purposes of this introduction, we'll assume a very primitive NUMA
+emulation setup of "numa=fake=4*512,".  This will split our system memory into
+four equal chunks of 512M each that we can now use to assign to cpusets.  As
+you become more familiar with using this combination for resource control,
+you'll determine a better setup to minimize the number of nodes you have to deal
+with.
+
+A machine may be split as follows with "numa=fake=4*512," as reported by dmesg:
+
+	Faking node 0 at 0000000000000000-0000000020000000 (512MB)
+	Faking node 1 at 0000000020000000-0000000040000000 (512MB)
+	Faking node 2 at 0000000040000000-0000000060000000 (512MB)
+	Faking node 3 at 0000000060000000-0000000080000000 (512MB)
+	...
+	On node 0 totalpages: 130975
+	On node 1 totalpages: 131072
+	On node 2 totalpages: 131072
+	On node 3 totalpages: 131072
+
+Now following the instructions for mounting the cpusets filesystem from
+Documentation/cpusets.txt, you can assign fake nodes (i.e. contiguous memory
+address spaces) to individual cpusets:
+
+	[root@xroads /]# mkdir exampleset
+	[root@xroads /]# mount -t cpuset none exampleset
+	[root@xroads /]# mkdir exampleset/ddset
+	[root@xroads /]# cd exampleset/ddset
+	[root@xroads /exampleset/ddset]# echo 0-1 > cpus
+	[root@xroads /exampleset/ddset]# echo 0-1 > mems
+
+Now this cpuset, 'ddset', will only allowed access to fake nodes 0 and 1 for
+memory allocations (1G).
+
+You can now assign tasks to these cpusets to limit the memory resources
+available to them according to the fake nodes assigned as mems:
+
+	[root@xroads /exampleset/ddset]# echo $$ > tasks
+	[root@xroads /exampleset/ddset]# dd if=/dev/zero of=tmp bs=1024 count=1G
+	[1] 13425
+
+Notice the difference between the system memory usage as reported by
+/proc/meminfo between the restricted cpuset case above and the unrestricted
+case (i.e. running the same 'dd' command without assigning it to a fake NUMA
+cpuset):
+				Unrestricted	Restricted
+	MemTotal:		3091900 kB	3091900 kB
+	MemFree:		  42113 kB	1513236 kB
+
+This allows for coarse memory management for the tasks you assign to particular
+cpusets.  Since cpusets can form a hierarchy, you can create some pretty
+interesting combinations of use-cases for various classes of tasks for your
+memory management needs.
-- 
cgit v1.2.3


From 8f9aeca7a081d81c4c9862be1e04f15b5ab5461f Mon Sep 17 00:00:00 2001
From: Bernhard Walle <bwalle@suse.de>
Date: Wed, 2 May 2007 19:27:10 +0200
Subject: [PATCH] x86: add command line length to boot protocol

Because the command line is increased to 2048 characters after 2.6.21, it's
not possible for boot loaders and userspace tools to determine the length
of the command line the kernel can understand.  The benefit of knowing the
length is that users can be warned if the command line size is too long
which prevents surprise if things don't work after bootup.

This patch updates the boot protocol to contain a field called
"cmdline_size" that contain the length of the command line (excluding the
terminating zero).

The patch also adds missing fields (of protocol version 2.05) to the x86_64
setup code.

Signed-off-by: Bernhard Walle <bwalle@suse.de>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Alon Bar-Lev <alon.barlev@gmail.com>
Acked-by: H. Peter Anvin <hpa@zytor.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/i386/boot.txt | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/i386/boot.txt b/Documentation/i386/boot.txt
index 38fe1f03fb1..6498666ea33 100644
--- a/Documentation/i386/boot.txt
+++ b/Documentation/i386/boot.txt
@@ -2,7 +2,7 @@
 		     ----------------------------
 
 		    H. Peter Anvin <hpa@zytor.com>
-			Last update 2007-01-26
+			Last update 2007-03-06
 
 On the i386 platform, the Linux kernel uses a rather complicated boot
 convention.  This has evolved partially due to historical aspects, as
@@ -35,9 +35,13 @@ Protocol 2.03:	(Kernel 2.4.18-pre1) Explicitly makes the highest possible
 		initrd address available to the bootloader.
 
 Protocol 2.04:	(Kernel 2.6.14) Extend the syssize field to four bytes.
+
 Protocol 2.05:	(Kernel 2.6.20) Make protected mode kernel relocatable.
 		Introduce relocatable_kernel and kernel_alignment fields.
 
+Protocol 2.06:	(Kernel 2.6.22) Added a field that contains the size of
+		the boot command line
+
 
 **** MEMORY LAYOUT
 
@@ -133,6 +137,8 @@ Offset	Proto	Name		Meaning
 022C/4	2.03+	initrd_addr_max	Highest legal initrd address
 0230/4	2.05+	kernel_alignment Physical addr alignment required for kernel
 0234/1	2.05+	relocatable_kernel Whether kernel is relocatable or not
+0235/3	N/A	pad2		Unused
+0238/4	2.06+	cmdline_size	Maximum size of the kernel command line
 
 (1) For backwards compatibility, if the setup_sects field contains 0, the
     real value is 4.
@@ -233,6 +239,12 @@ filled out, however:
 	if your ramdisk is exactly 131072 bytes long and this field is
 	0x37FFFFFF, you can start your ramdisk at 0x37FE0000.)
 
+  cmdline_size:
+	The maximum size of the command line without the terminating
+	zero. This means that the command line can contain at most
+	cmdline_size characters. With protocol version 2.05 and
+	earlier, the maximum size was 255.
+
 
 **** THE KERNEL COMMAND LINE
 
@@ -241,11 +253,10 @@ loader to communicate with the kernel.  Some of its options are also
 relevant to the boot loader itself, see "special command line options"
 below.
 
-The kernel command line is a null-terminated string currently up to
-255 characters long, plus the final null.  A string that is too long
-will be automatically truncated by the kernel, a boot loader may allow
-a longer command line to be passed to permit future kernels to extend
-this limit.
+The kernel command line is a null-terminated string. The maximum
+length can be retrieved from the field cmdline_size.  Before protocol
+version 2.06, the maximum was 255 characters.  A string that is too
+long will be automatically truncated by the kernel.
 
 If the boot protocol version is 2.02 or later, the address of the
 kernel command line is given by the header field cmd_line_ptr (see
-- 
cgit v1.2.3


From 1dbf527c51c6c20c19869c8125cb5b87c3d09506 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 2 May 2007 19:27:12 +0200
Subject: [PATCH] i386: Make COMPAT_VDSO runtime selectable.

Now that relocation of the VDSO for COMPAT_VDSO users is done at
runtime rather than compile time, it is possible to enable/disable
compat mode at runtime.

This patch allows you to enable COMPAT_VDSO mode with "vdso=2" on the
kernel command line, or via sysctl.  (Switching on a running system
shouldn't be done lightly; any process which was relying on the compat
VDSO will be upset if it goes away.)

The COMPAT_VDSO config option still exists, but if enabled it just
makes vdso_enabled default to VDSO_COMPAT.

+From: Hugh Dickins <hugh@veritas.com>

Fix oops from i386-make-compat_vdso-runtime-selectable.patch.

Even mingetty at system startup finds it easy to trigger an oops
while reading /proc/PID/maps: though it has a good hold on the mm
itself, that cannot stop exit_mm() from resetting tsk->mm to NULL.

(It is usually show_map()'s call to get_gate_vma() which oopses,
and I expect we could change that to check priv->tail_vma instead;
but no matter, even m_start()'s call just after get_task_mm() is racy.)

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Zachary Amsden <zach@vmware.com>
Cc: "Jan Beulich" <JBeulich@novell.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Roland McGrath <roland@redhat.com>
---
 Documentation/kernel-parameters.txt | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 84c3bd05c63..4287696f18d 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1820,6 +1820,7 @@ and is between 256 and 4096 characters. It is defined in the file
 			[USBHID] The interval which mice are to be polled at.
 
 	vdso=		[IA-32,SH]
+			vdso=2: enable compat VDSO (default with COMPAT_VDSO)
 			vdso=1: enable VDSO (default)
 			vdso=0: disable VDSO mapping
 
-- 
cgit v1.2.3


From f039b754714a422959027cb18bb33760eb8153f0 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 2 May 2007 19:27:12 +0200
Subject: [PATCH] x86: Don't use MWAIT on AMD Family 10

It doesn't put the CPU into deeper sleep states, so it's better to use the standard
idle loop to save power. But allow to reenable it anyways for benchmarking.

I also removed the obsolete idle=halt on i386

Cc: andreas.herrmann@amd.com

Signed-off-by: Andi Kleen <ak@suse.de>
---
 Documentation/kernel-parameters.txt | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 4287696f18d..94ce0d20253 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -695,8 +695,15 @@ and is between 256 and 4096 characters. It is defined in the file
 	idebus=		[HW] (E)IDE subsystem - VLB/PCI bus speed
 			See Documentation/ide.txt.
 
-	idle=		[HW]
-			Format: idle=poll or idle=halt
+	idle=		[X86]
+			Format: idle=poll or idle=mwait
+			Poll forces a polling idle loop that can slightly improves the performance
+			of waking up a idle CPU, but will use a lot of power and make the system
+			run hot. Not recommended.
+			idle=mwait. On systems which support MONITOR/MWAIT but the kernel chose
+			to not use it because it doesn't save as much power as a normal idle
+			loop use the MONITOR/MWAIT idle loop anyways. Performance should be the same
+			as idle=poll.
 
 	ignore_loglevel	[KNL]
 			Ignore loglevel setting - this will print /all/
-- 
cgit v1.2.3


From b7fb4af06c18496950a45b365f7a09c47ea64c17 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 2 May 2007 19:27:13 +0200
Subject: [PATCH] i386: Allow boot-time disable of SMP altinstructions

Add "noreplace-smp" to disable SMP instruction replacement.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 Documentation/kernel-parameters.txt | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 94ce0d20253..242b3a09b6c 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1164,6 +1164,9 @@ and is between 256 and 4096 characters. It is defined in the file
 
 	nomce		[IA-32] Machine Check Exception
 
+	noreplace-smp	[IA-32,SMP] Don't replace SMP instructions
+			with UP alternatives
+
 	noresidual	[PPC] Don't use residual data on PReP machines.
 
 	noresume	[SWSUSP] Disables resume and restores original swap
@@ -1569,6 +1572,9 @@ and is between 256 and 4096 characters. It is defined in the file
 	smart2=		[HW]
 			Format: <io1>[,<io2>[,...,<io8>]]
 
+	smp-alt-once	[IA-32,SMP] On a hotplug CPU system, only
+			attempt to substitute SMP alternatives once at boot.
+
 	snd-ad1816a=	[HW,ALSA]
 
 	snd-ad1848=	[HW,ALSA]
-- 
cgit v1.2.3


From 959b4fdfe7e27bcf101e2381e500e4076f2bb9ce Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 2 May 2007 19:27:16 +0200
Subject: [PATCH] i386: PARAVIRT: Allow boot-time disable of paravirt_ops
 patching

Add "noreplace-paravirt" to disable paravirt_ops patching.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/kernel-parameters.txt | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 242b3a09b6c..38d7db3262c 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -64,6 +64,7 @@ parameter is applicable:
 	GENERIC_TIME The generic timeofday code is enabled.
 	NFS	Appropriate NFS support is enabled.
 	OSS	OSS sound support is enabled.
+	PV_OPS	A paravirtualized kernel
 	PARIDE	The ParIDE subsystem is enabled.
 	PARISC	The PA-RISC architecture is enabled.
 	PCI	PCI bus support is enabled.
@@ -1164,6 +1165,8 @@ and is between 256 and 4096 characters. It is defined in the file
 
 	nomce		[IA-32] Machine Check Exception
 
+	noreplace-paravirt	[IA-32,PV_OPS] Don't patch paravirt_ops
+
 	noreplace-smp	[IA-32,SMP] Don't replace SMP instructions
 			with UP alternatives
 
-- 
cgit v1.2.3


From 8a336b0a4b6dfacc8cc5fd617ba1e1904077de2d Mon Sep 17 00:00:00 2001
From: Tim Hockin <thockin@google.com>
Date: Wed, 2 May 2007 19:27:19 +0200
Subject: [PATCH] x86-64: Dynamically adjust machine check interval

Background:
 We've found that MCEs (specifically DRAM SBEs) tend to come in bunches,
 especially when we are trying really hard to stress the system out.  The
 current MCE poller uses a static interval which does not care whether it
 has or has not found MCEs recently.

Description:
 This patch makes the MCE poller adjust the polling interval dynamically.
 If we find an MCE, poll 2x faster (down to 10 ms).  When we stop finding
 MCEs, poll 2x slower (up to check_interval seconds).  The check_interval
 tunable becomes the max polling interval.  The "Machine check events
 logged" printk() is rate limited to the check_interval, which should be
 identical behavior to the old functionality.

Result:
 If you start to take a lot of correctable errors (not exceptions), you
 log them faster and more accurately (less chance of overflowing the MCA
 registers).  If you don't take a lot of errors, you will see no change.

Alternatives:
 I considered simply reducing the polling interval to 10 ms immediately
 and keeping it there as long as we continue to find errors.  This felt a
 bit heavy handed, but does perform significantly better for the default
 check_interval of 5 minutes (we're using a few seconds when testing for
 DRAM errors).  I could be convinced to go with this, if anyone felt it
 was not too aggressive.

Testing:
 I used an error-injecting DIMM to create lots of correctable DRAM errors
 and verified that the polling interval accelerates.  The printk() only
 happens once per check_interval seconds.

Patch:
 This patch is against 2.6.21-rc7.

Signed-Off-By: Tim Hockin <thockin@google.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 Documentation/x86_64/machinecheck | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/x86_64/machinecheck b/Documentation/x86_64/machinecheck
index 068a6d9904b..feaeaf6f6e4 100644
--- a/Documentation/x86_64/machinecheck
+++ b/Documentation/x86_64/machinecheck
@@ -36,7 +36,12 @@ between all CPUs.
 
 check_interval
 	How often to poll for corrected machine check errors, in seconds
-	(Note output is hexademical). Default 5 minutes.
+	(Note output is hexademical). Default 5 minutes.  When the poller
+	finds MCEs it triggers an exponential speedup (poll more often) on
+	the polling interval.  When the poller stops finding MCEs, it
+	triggers an exponential backoff (poll less often) on the polling
+	interval. The check_interval variable is both the initial and
+	maximum polling interval.
 
 tolerant
 	Tolerance level. When a machine check exception occurs for a non
-- 
cgit v1.2.3


From b466004a660c490f3bfb12be8b5ca18bfc393261 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 2 May 2007 19:27:21 +0200
Subject: [PATCH] x86-64: Don't exclude asm-offsets.c in Documentation/dontdiff

asm-offsets.c is valid source code and needs to be diffed.

Signed-off-by: Andi Kleen <ak@suse.de>
---
 Documentation/dontdiff | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/dontdiff b/Documentation/dontdiff
index 63c2d0c55aa..64e9f6c4826 100644
--- a/Documentation/dontdiff
+++ b/Documentation/dontdiff
@@ -55,8 +55,8 @@ aic7*seq.h*
 aicasm
 aicdb.h*
 asm
-asm-offsets.*
-asm_offsets.*
+asm-offsets.h
+asm_offsets.h
 autoconf.h*
 bbootsect
 bin2c
-- 
cgit v1.2.3


From ded2e1640ffaee26c054a42e5210c1086fb1d8eb Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Tue, 20 Mar 2007 09:47:47 -0500
Subject: kbuild: small documentation fix in Documentation/kbuild/modules.txt

The Makefile fragment in Documentation/kbuild/modules.txt looks to be
missing some braces.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
---
 Documentation/kbuild/modules.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/kbuild/modules.txt b/Documentation/kbuild/modules.txt
index 769ee05ee4d..1d247d59ad5 100644
--- a/Documentation/kbuild/modules.txt
+++ b/Documentation/kbuild/modules.txt
@@ -249,7 +249,7 @@ following files:
 		--> filename: Makefile
 		KERNELDIR := /lib/modules/`uname -r`/build
 		all::
-			$(MAKE) -C $KERNELDIR M=`pwd` $@
+			$(MAKE) -C $(KERNELDIR) M=`pwd` $@
 
 		# Module specific targets
 		genbin:
-- 
cgit v1.2.3


From e711db3edfe7c0e32b6430e7d041905f856aa79a Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@neptun.ravnborg.org>
Date: Fri, 13 Apr 2007 23:07:00 +0200
Subject: kbuild: fix make mrproper for Documentation/DocBook/man

"make mandocs" generate > 2000 files in Documentation/DocBook/man
and this caused kbuild to barf out during make mrproper like this:

make -f scripts/Makefile.clean obj=Documentation/DocBook
make -f scripts/Makefile.clean obj=Documentation/DocBook/man/
make[2]: execvp: /bin/sh: Argument list too long
make[2]: *** [__clean] Error 127
make[1]: *** [Documentation/DocBook/man/] Error 2
make: *** [_mrproper_Documentation/DocBook] Error 2

The man directory were solely used for output
so the fix is to remove it entirely during the
make mrproper process.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
---
 Documentation/DocBook/Makefile     | 7 ++-----
 Documentation/DocBook/man/Makefile | 3 ---
 2 files changed, 2 insertions(+), 8 deletions(-)
 delete mode 100644 Documentation/DocBook/man/Makefile

(limited to 'Documentation')

diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile
index 867608ab3ca..960f4025ee8 100644
--- a/Documentation/DocBook/Makefile
+++ b/Documentation/DocBook/Makefile
@@ -152,6 +152,7 @@ quiet_cmd_db2man = MAN     $@
 	@(which xmlto > /dev/null 2>&1) || \
 	 (echo "*** You need to install xmlto ***"; \
 	  exit 1)
+	$(Q)mkdir -p $(obj)/man
 	$(call cmd,db2man)
 	@touch $@
 
@@ -212,11 +213,7 @@ clean-files := $(DOCBOOKS) \
 	$(patsubst %.xml, %.9,    $(DOCBOOKS)) \
 	$(C-procfs-example)
 
-clean-dirs := $(patsubst %.xml,%,$(DOCBOOKS))
-
-#man put files in man subdir - traverse down
-subdir- := man/
-
+clean-dirs := $(patsubst %.xml,%,$(DOCBOOKS)) man
 
 # Declare the contents of the .PHONY variable as phony.  We keep that
 # information in a variable se we can use it in if_changed and friends.
diff --git a/Documentation/DocBook/man/Makefile b/Documentation/DocBook/man/Makefile
deleted file mode 100644
index 4fb7ea0f7ac..00000000000
--- a/Documentation/DocBook/man/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-# Rules are put in Documentation/DocBook
-
-clean-files := *.9.gz *.sgml manpage.links manpage.refs
-- 
cgit v1.2.3


From f15a3ccdc800cef08b346fef5f96860a05e7a3fa Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed, 11 Apr 2007 08:44:12 -0700
Subject: kernel-doc: alphabetically-sorted entries in index.html of 'htmldocs'

Make docbook index.html contain sorted output.

I prefer to let the computer do it.  This also avoids
people not reading the comment(s).

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
---
 Documentation/DocBook/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile
index 960f4025ee8..10b5cd6c54a 100644
--- a/Documentation/DocBook/Makefile
+++ b/Documentation/DocBook/Makefile
@@ -41,7 +41,7 @@ psdocs: $(PS)
 PDF := $(patsubst %.xml, %.pdf, $(BOOKS))
 pdfdocs: $(PDF)
 
-HTML := $(patsubst %.xml, %.html, $(BOOKS))
+HTML := $(sort $(patsubst %.xml, %.html, $(BOOKS)))
 htmldocs: $(HTML)
 
 MAN := $(patsubst %.xml, %.9, $(BOOKS))
-- 
cgit v1.2.3


From 3265b54556b2d8ed4e9612b08edb592b60205c40 Mon Sep 17 00:00:00 2001
From: Rolf Eike Beer <eike-kernel@sf-tec.de>
Date: Tue, 1 May 2007 11:00:19 +0200
Subject: DOC: Fix wrong identifier name in
 Documentation/driver-model/devres.txt

Above and below we talk about my_midlayer_create_something, I assume that is
also meant here.

Signed-off-by: Rolf Eike Beer <eike-kernel@sf-tec.de>
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/driver-model/devres.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index 5163b85308f..6c8d8f27db3 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -182,7 +182,7 @@ For example, you can do something like the following.
 
 	...
 
-	devres_close_group(dev, my_midlayer_something);
+	devres_close_group(dev, my_midlayer_create_something);
 	return 0;
   }
 
-- 
cgit v1.2.3


From 6ba186361ed2cda7e174856a3ab8a8e3237b3c3d Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Sat, 7 Apr 2007 17:21:28 +0200
Subject: PCI: Require vendor and device for new_id

Currently, there is no minimum number of fields required when adding
a new device ID to a PCI driver through the new_id sysfs file. It is
possible to add a new ID with only the vendor ID set, causing the
driver to attempt to attach to all PCI devices from that vendor. This
has been reported to happen accidentally:
  http://lists.lm-sensors.org/pipermail/lm-sensors/2007-March/019366.html
It is even possible to not even set the vendor ID field, causing the
driver to attempt to attach to _all_ the PCI devices.

This sounds dangerous and I fail to see any valid use of this
"feature". Thus I suggest that we now require at least the first two
fields (vendor ID and device ID) to be set. For what it's worth, this
is what the USB subsystem does.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/pci.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/pci.txt b/Documentation/pci.txt
index cdf2f3c0ab1..a8ded1a8bd6 100644
--- a/Documentation/pci.txt
+++ b/Documentation/pci.txt
@@ -163,9 +163,9 @@ echo "vendor device subvendor subdevice class class_mask driver_data" > \
 /sys/bus/pci/drivers/{driver}/new_id
 
 All fields are passed in as hexadecimal values (no leading 0x).
-Users need pass only as many fields as necessary:
-	o vendor, device, subvendor, and subdevice fields default
-	  to PCI_ANY_ID (FFFFFFFF),
+The vendor and device fields are mandatory, the others are optional. Users
+need pass only as many optional fields as necessary:
+	o subvendor and subdevice fields default to PCI_ANY_ID (FFFFFFFF)
 	o class and classmask fields default to 0
 	o driver_data defaults to 0UL.
 
-- 
cgit v1.2.3


From 54eee4c5bf553ad54ba200d00487b61eb6b155f6 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed, 4 Apr 2007 21:35:39 -0700
Subject: PCI Documentation: power/pci.txt fix copy/paste error

Correct function name copy-paste error.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/power/pci.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/power/pci.txt b/Documentation/power/pci.txt
index b6a3cbf7e84..e00b099a4b8 100644
--- a/Documentation/power/pci.txt
+++ b/Documentation/power/pci.txt
@@ -203,7 +203,7 @@ resume
 
 Usage:
 
-if (dev->driver && dev->driver->suspend)
+if (dev->driver && dev->driver->resume)
 	dev->driver->resume(dev)
 
 The resume callback may be called from any power state, and is always meant to
-- 
cgit v1.2.3


From 5adc55da4a7758021bcc374904b0f8b076508a11 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Tue, 27 Mar 2007 03:02:51 +0200
Subject: PCI: remove the broken PCI_MULTITHREAD_PROBE option

This patch removes the PCI_MULTITHREAD_PROBE option that had already
been marked as broken.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/pci.txt | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/pci.txt b/Documentation/pci.txt
index a8ded1a8bd6..40c4717d236 100644
--- a/Documentation/pci.txt
+++ b/Documentation/pci.txt
@@ -124,10 +124,6 @@ initialization with a pointer to a structure describing the driver
 
 	err_handler	See Documentation/pci-error-recovery.txt
 
-	multithread_probe	Enable multi-threaded probe/scan. Driver must
-			provide its own locking/syncronization for init
-			operations if this is enabled.
-
 
 The ID table is an array of struct pci_device_id entries ending with an
 all-zero entry.  Each entry consists of:
-- 
cgit v1.2.3


From ecf36501bc4ad399e6df2e0bdaa513a2d510b7ec Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Fri, 6 Apr 2007 12:19:48 +0200
Subject: PCI: the overdue removal of pci_module_init()

Unless we finally completely remove it, people will always add new users.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/feature-removal-schedule.txt | 7 -------
 Documentation/pci.txt                      | 2 --
 2 files changed, 9 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 5c88ba1ea26..144058cf849 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -117,13 +117,6 @@ Who:   Adrian Bunk <bunk@stusta.de>
 
 ---------------------------
 
-What:	pci_module_init(driver)
-When:	January 2007
-Why:	Is replaced by pci_register_driver(pci_driver).
-Who:	Richard Knutsson <ricknu-0@student.ltu.se> and Greg Kroah-Hartman <gregkh@suse.de>
-
----------------------------
-
 What:	Usage of invalid timevals in setitimer
 When:	March 2007
 Why:	POSIX requires to validate timevals in the setitimer call. This
diff --git a/Documentation/pci.txt b/Documentation/pci.txt
index 40c4717d236..e2c9d0a0c43 100644
--- a/Documentation/pci.txt
+++ b/Documentation/pci.txt
@@ -545,8 +545,6 @@ pci_find_slot()			Find pci_dev corresponding to given bus and
 pci_set_power_state()		Set PCI Power Management state (0=D0 ... 3=D3)
 pci_find_capability()		Find specified capability in device's capability
 				list.
-pci_module_init()		Inline helper function for ensuring correct
-				pci_driver initialization and error handling.
 pci_resource_start()		Returns bus start address for a given PCI region
 pci_resource_end()		Returns bus end address for a given PCI region
 pci_resource_len()		Returns the byte length of a PCI region
-- 
cgit v1.2.3


From 52706ec903dcc7679acf5b93400d68fbc5384553 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cornelia.huck@de.ibm.com>
Date: Fri, 4 May 2007 18:47:50 +0200
Subject: [S390] cio: Deprecate read_dev_chars() and read_conf_data{,_lpm}().

These helper functions are a leftover from 2.4 sync I/O and are a
notorious source for bugs. They lead to device driver specific code
creeping into cio, and some issues can't really be fixed at all.

Device drivers can easily implement those functions themselves in a
more robust manner, so let's get rid of them.

Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 Documentation/feature-removal-schedule.txt | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 5c88ba1ea26..1a9e600a73a 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -314,3 +314,20 @@ Why:	Code was merged, then submitter immediately disappeared leaving
 Who:	David S. Miller <davem@davemloft.net>
 
 ---------------------------
+
+What:	read_dev_chars(), read_conf_data{,_lpm}() (s390 common I/O layer)
+When:	December 2007
+Why:	These functions are a leftover from 2.4 times. They have several
+	problems:
+	- Duplication of checks that are done in the device driver's
+	  interrupt handler
+	- common I/O layer can't do device specific error recovery
+	- device driver can't be notified for conditions happening during
+	  execution of the function
+	Device drivers should issue the read device characteristics and read
+	configuration data ccws and do the appropriate error handling
+	themselves.
+Who:	Cornelia Huck <cornelia.huck@de.ibm.com>
+
+---------------------------
+
-- 
cgit v1.2.3


From 8bc8493063f938c932819958a7b5a0d56046bc96 Mon Sep 17 00:00:00 2001
From: Stuart MacDonald <stuartm@connecttech.com>
Date: Fri, 4 May 2007 16:00:03 -0400
Subject: MAINTAINER change for Connect Tech Inc

I am no longer with CTI. The Support Department will handle all
inquiries regarding the WH.

Signed-off-by: Stuart MacDonald <stuartm@connecttech.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/usb/usb-serial.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/usb/usb-serial.txt b/Documentation/usb/usb-serial.txt
index d61f6e7865d..b18e86a2250 100644
--- a/Documentation/usb/usb-serial.txt
+++ b/Documentation/usb/usb-serial.txt
@@ -42,7 +42,7 @@ ConnectTech WhiteHEAT 4 port converter
   http://www.connecttech.com
 
   For any questions or problems with this driver, please contact
-  Stuart MacDonald at stuartm@connecttech.com
+  Connect Tech's Support Department at support@connecttech.com
 
 
 HandSpring Visor, Palm USB, and Cli� USB driver
-- 
cgit v1.2.3


From d29c91c70bc7790b112119135fae7690cbf17577 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 14 Mar 2007 14:25:49 +0900
Subject: doc: Update sysrq doc for sh kgdb trigger.

sh uses the same sysrq trigger as ppc, update the documentation to
reflect that.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 Documentation/sysrq.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
index d43aa9d3c10..ba328f25541 100644
--- a/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@ -1,6 +1,6 @@
 Linux Magic System Request Key Hacks
 Documentation for sysrq.c
-Last update: 2007-JAN-06
+Last update: 2007-MAR-14
 
 *  What is the magic SysRq key?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -75,7 +75,7 @@ On all -  write a character to /proc/sysrq-trigger.  e.g.:
 
 'f'	- Will call oom_kill to kill a memory hog process.
 
-'g'	- Used by kgdb on ppc platforms.
+'g'	- Used by kgdb on ppc and sh platforms.
 
 'h'     - Will display help (actually any other key than those listed
           above will display help. but 'h' is easy to remember :-)
-- 
cgit v1.2.3


From 1929cb340b74904c130fdf3de3fe5bbedb68a5aa Mon Sep 17 00:00:00 2001
From: dmitry pervushin <dimka@nomadgs.com>
Date: Tue, 24 Apr 2007 13:39:09 +0900
Subject: sh: SH7722 clock framework support.

This adds support for the SH7722 (MobileR) to the clock framework.

Signed-off-by: dmitry pervushin <dimka@nomadgs.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 Documentation/sh/clk.txt | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100644 Documentation/sh/clk.txt

(limited to 'Documentation')

diff --git a/Documentation/sh/clk.txt b/Documentation/sh/clk.txt
new file mode 100644
index 00000000000..9aef710e9a4
--- /dev/null
+++ b/Documentation/sh/clk.txt
@@ -0,0 +1,32 @@
+Clock framework on SuperH architecture
+
+The framework on SH extends existing API by the function clk_set_rate_ex,
+which prototype is as follows:
+
+    clk_set_rate_ex (struct clk *clk, unsigned long rate, int algo_id)
+
+The algo_id parameter is used to specify algorithm used to recalculate clocks,
+adjanced to clock, specified as first argument. It is assumed that algo_id==0
+means no changes to adjanced clock
+
+Internally, the clk_set_rate_ex forwards request to clk->ops->set_rate method,
+if it is present in ops structure. The method should set the clock rate and adjust
+all needed clocks according to the passed algo_id.
+Exact values for algo_id are machine-dependend. For the sh7722, the following
+values are defined:
+
+	NO_CHANGE	= 0,
+	IUS_N1_N1,	/* I:U = N:1, U:Sh = N:1 */
+	IUS_322,	/* I:U:Sh = 3:2:2	 */
+	IUS_522,	/* I:U:Sh = 5:2:2 	 */
+	IUS_N11,	/* I:U:Sh = N:1:1	 */
+	SB_N1,		/* Sh:B = N:1		 */
+	SB3_N1,		/* Sh:B3 = N:1		 */
+	SB3_32,		/* Sh:B3 = 3:2		 */
+	SB3_43,		/* Sh:B3 = 4:3		 */
+	SB3_54,		/* Sh:B3 = 5:4		 */
+	BP_N1,		/* B:P	 = N:1		 */
+	IP_N1		/* I:P	 = N:1		 */
+
+Each of these constants means relation between clocks that can be set via the FRQCR
+register
-- 
cgit v1.2.3


From 6179b5562d5d17c7c09b54cb11dd925ca308d7a9 Mon Sep 17 00:00:00 2001
From: Bernhard Walle <bwalle@suse.de>
Date: Sun, 6 May 2007 14:48:44 -0700
Subject: add new_id to PCMCIA drivers

PCI drivers have the new_id file in sysfs which allows new IDs to be added
at runtime.  The advantage is to avoid re-compilation of a driver that
works for a new device, but it's ID table doesn't contain the new device.
This mechanism is only meant for testing, after the driver has been tested
successfully, the ID should be added in source code so that new revisions
of the kernel automatically detect the device.

The implementation follows the PCI implementation. The interface is documented
in Documentation/pcmcia/driver.txt. Computations should be done in userspace,
so the sysfs string contains the raw structure members for matching.

Signed-off-by: Bernhard Walle <bwalle@suse.de>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/pcmcia/driver.txt | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)
 create mode 100644 Documentation/pcmcia/driver.txt

(limited to 'Documentation')

diff --git a/Documentation/pcmcia/driver.txt b/Documentation/pcmcia/driver.txt
new file mode 100644
index 00000000000..0ac16792077
--- /dev/null
+++ b/Documentation/pcmcia/driver.txt
@@ -0,0 +1,30 @@
+PCMCIA Driver
+-------------
+
+
+sysfs
+-----
+
+New PCMCIA IDs may be added to a device driver pcmcia_device_id table at
+runtime as shown below:
+
+echo "match_flags manf_id card_id func_id function device_no \
+prod_id_hash[0] prod_id_hash[1] prod_id_hash[2] prod_id_hash[3]" > \
+/sys/bus/pcmcia/drivers/{driver}/new_id
+
+All fields are passed in as hexadecimal values (no leading 0x).
+The meaning is described in the PCMCIA specification, the match_flags is
+a bitwise or-ed combination from PCMCIA_DEV_ID_MATCH_* constants
+defined in include/linux/mod_devicetable.h.
+
+Once added, the driver probe routine will be invoked for any unclaimed
+PCMCIA device listed in its (newly updated) pcmcia_device_id list.
+
+A common use-case is to add a new device according to the manufacturer ID
+and the card ID (form the manf_id and card_id file in the device tree).
+For this, just use:
+
+echo "0x3 manf_id card_id 0 0 0 0 0 0 0" > \
+        /sys/bus/pcmcia/drivers/{driver}/new_id
+
+after loading the driver.
-- 
cgit v1.2.3


From b813e931b4c8235bb42e301096ea97dbdee3e8fe Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Sun, 6 May 2007 14:49:24 -0700
Subject: smaps: add clear_refs file to clear reference

Adds /proc/pid/clear_refs.  When any non-zero number is written to this file,
pte_mkold() and ClearPageReferenced() is called for each pte and its
corresponding page, respectively, in that task's VMAs.  This file is only
writable by the user who owns the task.

It is now possible to measure _approximately_ how much memory a task is using
by clearing the reference bits with

	echo 1 > /proc/pid/clear_refs

and checking the reference count for each VMA from the /proc/pid/smaps output
at a measured time interval.  For example, to observe the approximate change
in memory footprint for a task, write a script that clears the references
(echo 1 > /proc/pid/clear_refs), sleeps, and then greps for Pgs_Referenced and
extracts the size in kB.  Add the sizes for each VMA together for the total
referenced footprint.  Moments later, repeat the process and observe the
difference.

For example, using an efficient Mozilla:

	accumulated time		referenced memory
	----------------		-----------------
		 0 s				 408 kB
		 1 s				 408 kB
		 2 s				 556 kB
		 3 s				1028 kB
		 4 s				 872 kB
		 5 s				1956 kB
		 6 s				 416 kB
		 7 s				1560 kB
		 8 s				2336 kB
		 9 s				1044 kB
		10 s				 416 kB

This is a valuable tool to get an approximate measurement of the memory
footprint for a task.

Cc: Hugh Dickins <hugh@veritas.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Christoph Lameter <clameter@sgi.com>
Signed-off-by: David Rientjes <rientjes@google.com>
[akpm@linux-foundation.org: build fixes]
[mpm@selenic.com: rename for_each_pmd]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/proc.txt | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 7aaf09b86a5..3f4b226572e 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -122,21 +122,22 @@ subdirectory has the entries listed in Table 1-1.
 
 Table 1-1: Process specific entries in /proc 
 ..............................................................................
- File    Content                                        
- cmdline Command line arguments                         
- cpu	 Current and last cpu in which it was executed		(2.4)(smp)
- cwd	 Link to the current working directory
- environ Values of environment variables      
- exe	 Link to the executable of this process
- fd      Directory, which contains all file descriptors 
- maps	 Memory maps to executables and library files		(2.4)
- mem     Memory held by this process                    
- root	 Link to the root directory of this process
- stat    Process status                                 
- statm   Process memory status information              
- status  Process status in human readable form          
- wchan   If CONFIG_KALLSYMS is set, a pre-decoded wchan
- smaps	 Extension based on maps, presenting the rss size for each mapped file
+ File		Content
+ clear_refs	Clears page referenced bits shown in smaps output
+ cmdline	Command line arguments
+ cpu		Current and last cpu in which it was executed	(2.4)(smp)
+ cwd		Link to the current working directory
+ environ	Values of environment variables
+ exe		Link to the executable of this process
+ fd		Directory, which contains all file descriptors
+ maps		Memory maps to executables and library files	(2.4)
+ mem		Memory held by this process
+ root		Link to the root directory of this process
+ stat		Process status
+ statm		Process memory status information
+ status		Process status in human readable form
+ wchan		If CONFIG_KALLSYMS is set, a pre-decoded wchan
+ smaps		Extension based on maps, the rss size for each mapped file
 ..............................................................................
 
 For example, to get the status information of a process, all you have to do is
-- 
cgit v1.2.3


From 352434211dad370316155d90d7dab590519f465b Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Sun, 6 May 2007 14:49:47 -0700
Subject: slub: user documentation

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/vm/slub.txt | 113 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 113 insertions(+)
 create mode 100644 Documentation/vm/slub.txt

(limited to 'Documentation')

diff --git a/Documentation/vm/slub.txt b/Documentation/vm/slub.txt
new file mode 100644
index 00000000000..727c8d81aea
--- /dev/null
+++ b/Documentation/vm/slub.txt
@@ -0,0 +1,113 @@
+Short users guide for SLUB
+--------------------------
+
+First of all slub should transparently replace SLAB. If you enable
+SLUB then everything should work the same (Note the word "should".
+There is likely not much value in that word at this point).
+
+The basic philosophy of SLUB is very different from SLAB. SLAB
+requires rebuilding the kernel to activate debug options for all
+SLABS. SLUB always includes full debugging but its off by default.
+SLUB can enable debugging only for selected slabs in order to avoid
+an impact on overall system performance which may make a bug more
+difficult to find.
+
+In order to switch debugging on one can add a option "slub_debug"
+to the kernel command line. That will enable full debugging for
+all slabs.
+
+Typically one would then use the "slabinfo" command to get statistical
+data and perform operation on the slabs. By default slabinfo only lists
+slabs that have data in them. See "slabinfo -h" for more options when
+running the command. slabinfo can be compiled with
+
+gcc -o slabinfo Documentation/vm/slabinfo.c
+
+Some of the modes of operation of slabinfo require that slub debugging
+be enabled on the command line. F.e. no tracking information will be
+available without debugging on and validation can only partially
+be performed if debugging was not switched on.
+
+Some more sophisticated uses of slub_debug:
+-------------------------------------------
+
+Parameters may be given to slub_debug. If none is specified then full
+debugging is enabled. Format:
+
+slub_debug=<Debug-Options>       Enable options for all slabs
+slub_debug=<Debug-Options>,<slab name>
+				Enable options only for select slabs
+
+Possible debug options are
+	F		Sanity checks on (enables SLAB_DEBUG_FREE. Sorry
+			SLAB legacy issues)
+	Z		Red zoning
+	P		Poisoning (object and padding)
+	U		User tracking (free and alloc)
+	T		Trace (please only use on single slabs)
+
+F.e. in order to boot just with sanity checks and red zoning one would specify:
+
+	slub_debug=FZ
+
+Trying to find an issue in the dentry cache? Try
+
+	slub_debug=,dentry_cache
+
+to only enable debugging on the dentry cache.
+
+Red zoning and tracking may realign the slab.  We can just apply sanity checks
+to the dentry cache with
+
+	slub_debug=F,dentry_cache
+
+In case you forgot to enable debugging on the kernel command line: It is
+possible to enable debugging manually when the kernel is up. Look at the
+contents of:
+
+/sys/slab/<slab name>/
+
+Look at the writable files. Writing 1 to them will enable the
+corresponding debug option. All options can be set on a slab that does
+not contain objects. If the slab already contains objects then sanity checks
+and tracing may only be enabled. The other options may cause the realignment
+of objects.
+
+Careful with tracing: It may spew out lots of information and never stop if
+used on the wrong slab.
+
+SLAB Merging
+------------
+
+If no debugging is specified then SLUB may merge similar slabs together
+in order to reduce overhead and increase cache hotness of objects.
+slabinfo -a displays which slabs were merged together.
+
+Getting more performance
+------------------------
+
+To some degree SLUB's performance is limited by the need to take the
+list_lock once in a while to deal with partial slabs. That overhead is
+governed by the order of the allocation for each slab. The allocations
+can be influenced by kernel parameters:
+
+slub_min_objects=x		(default 8)
+slub_min_order=x		(default 0)
+slub_max_order=x		(default 4)
+
+slub_min_objects allows to specify how many objects must at least fit
+into one slab in order for the allocation order to be acceptable.
+In general slub will be able to perform this number of allocations
+on a slab without consulting centralized resources (list_lock) where
+contention may occur.
+
+slub_min_order specifies a minim order of slabs. A similar effect like
+slub_min_objects.
+
+slub_max_order specified the order at which slub_min_objects should no
+longer be checked. This is useful to avoid SLUB trying to generate
+super large order pages to fit slub_min_objects of a slab cache with
+large object sizes into one high order page.
+
+
+Christoph Lameter, <clameter@sgi.com>, April 10, 2007
-- 
cgit v1.2.3


From c09d87517298fd01543739ba26987645deb4e6a9 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Sun, 6 May 2007 14:49:48 -0700
Subject: slub: add slabinfo tool

Add the tool which gets reports about slabs to the VM documentation directory.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/vm/slabinfo.c | 943 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 943 insertions(+)
 create mode 100644 Documentation/vm/slabinfo.c

(limited to 'Documentation')

diff --git a/Documentation/vm/slabinfo.c b/Documentation/vm/slabinfo.c
new file mode 100644
index 00000000000..41710ccf3a2
--- /dev/null
+++ b/Documentation/vm/slabinfo.c
@@ -0,0 +1,943 @@
+/*
+ * Slabinfo: Tool to get reports about slabs
+ *
+ * (C) 2007 sgi, Christoph Lameter <clameter@sgi.com>
+ *
+ * Compile by:
+ *
+ * gcc -o slabinfo slabinfo.c
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <getopt.h>
+#include <regex.h>
+
+#define MAX_SLABS 500
+#define MAX_ALIASES 500
+#define MAX_NODES 1024
+
+struct slabinfo {
+	char *name;
+	int alias;
+	int refs;
+	int aliases, align, cache_dma, cpu_slabs, destroy_by_rcu;
+	int hwcache_align, object_size, objs_per_slab;
+	int sanity_checks, slab_size, store_user, trace;
+	int order, poison, reclaim_account, red_zone;
+	unsigned long partial, objects, slabs;
+	int numa[MAX_NODES];
+	int numa_partial[MAX_NODES];
+} slabinfo[MAX_SLABS];
+
+struct aliasinfo {
+	char *name;
+	char *ref;
+	struct slabinfo *slab;
+} aliasinfo[MAX_ALIASES];
+
+int slabs = 0;
+int aliases = 0;
+int alias_targets = 0;
+int highest_node = 0;
+
+char buffer[4096];
+
+int show_alias = 0;
+int show_slab = 0;
+int skip_zero = 1;
+int show_numa = 0;
+int show_track = 0;
+int show_first_alias = 0;
+int validate = 0;
+int shrink = 0;
+int show_inverted = 0;
+int show_single_ref = 0;
+int show_totals = 0;
+int sort_size = 0;
+
+int page_size;
+
+regex_t pattern;
+
+void fatal(const char *x, ...)
+{
+	va_list ap;
+
+	va_start(ap, x);
+	vfprintf(stderr, x, ap);
+	va_end(ap);
+	exit(1);
+}
+
+void usage(void)
+{
+	printf("slabinfo [-ahnpvtsz] [slab-regexp]\n"
+		"-a|--aliases           Show aliases\n"
+		"-h|--help              Show usage information\n"
+		"-n|--numa              Show NUMA information\n"
+		"-s|--shrink            Shrink slabs\n"
+		"-v|--validate          Validate slabs\n"
+		"-t|--tracking          Show alloc/free information\n"
+		"-T|--Totals            Show summary information\n"
+		"-l|--slabs             Show slabs\n"
+		"-S|--Size              Sort by size\n"
+		"-z|--zero              Include empty slabs\n"
+		"-f|--first-alias       Show first alias\n"
+		"-i|--inverted          Inverted list\n"
+		"-1|--1ref              Single reference\n"
+	);
+}
+
+unsigned long read_obj(char *name)
+{
+	FILE *f = fopen(name, "r");
+
+	if (!f)
+		buffer[0] = 0;
+	else {
+		if (!fgets(buffer,sizeof(buffer), f))
+			buffer[0] = 0;
+		fclose(f);
+		if (buffer[strlen(buffer)] == '\n')
+			buffer[strlen(buffer)] = 0;
+	}
+	return strlen(buffer);
+}
+
+
+/*
+ * Get the contents of an attribute
+ */
+unsigned long get_obj(char *name)
+{
+	if (!read_obj(name))
+		return 0;
+
+	return atol(buffer);
+}
+
+unsigned long get_obj_and_str(char *name, char **x)
+{
+	unsigned long result = 0;
+	char *p;
+
+	*x = NULL;
+
+	if (!read_obj(name)) {
+		x = NULL;
+		return 0;
+	}
+	result = strtoul(buffer, &p, 10);
+	while (*p == ' ')
+		p++;
+	if (*p)
+		*x = strdup(p);
+	return result;
+}
+
+void set_obj(struct slabinfo *s, char *name, int n)
+{
+	char x[100];
+
+	sprintf(x, "%s/%s", s->name, name);
+
+	FILE *f = fopen(x, "w");
+
+	if (!f)
+		fatal("Cannot write to %s\n", x);
+
+	fprintf(f, "%d\n", n);
+	fclose(f);
+}
+
+/*
+ * Put a size string together
+ */
+int store_size(char *buffer, unsigned long value)
+{
+	unsigned long divisor = 1;
+	char trailer = 0;
+	int n;
+
+	if (value > 1000000000UL) {
+		divisor = 100000000UL;
+		trailer = 'G';
+	} else if (value > 1000000UL) {
+		divisor = 100000UL;
+		trailer = 'M';
+	} else if (value > 1000UL) {
+		divisor = 100;
+		trailer = 'K';
+	}
+
+	value /= divisor;
+	n = sprintf(buffer, "%ld",value);
+	if (trailer) {
+		buffer[n] = trailer;
+		n++;
+		buffer[n] = 0;
+	}
+	if (divisor != 1) {
+		memmove(buffer + n - 2, buffer + n - 3, 4);
+		buffer[n-2] = '.';
+		n++;
+	}
+	return n;
+}
+
+void decode_numa_list(int *numa, char *t)
+{
+	int node;
+	int nr;
+
+	memset(numa, 0, MAX_NODES * sizeof(int));
+
+	while (*t == 'N') {
+		t++;
+		node = strtoul(t, &t, 10);
+		if (*t == '=') {
+			t++;
+			nr = strtoul(t, &t, 10);
+			numa[node] = nr;
+			if (node > highest_node)
+				highest_node = node;
+		}
+		while (*t == ' ')
+			t++;
+	}
+}
+
+void slab_validate(struct slabinfo *s)
+{
+	set_obj(s, "validate", 1);
+}
+
+void slab_shrink(struct slabinfo *s)
+{
+	set_obj(s, "shrink", 1);
+}
+
+int line = 0;
+
+void first_line(void)
+{
+	printf("Name                 Objects   Objsize    Space "
+		"Slabs/Part/Cpu  O/S O %%Fr %%Ef Flg\n");
+}
+
+/*
+ * Find the shortest alias of a slab
+ */
+struct aliasinfo *find_one_alias(struct slabinfo *find)
+{
+	struct aliasinfo *a;
+	struct aliasinfo *best = NULL;
+
+	for(a = aliasinfo;a < aliasinfo + aliases; a++) {
+		if (a->slab == find &&
+			(!best || strlen(best->name) < strlen(a->name))) {
+				best = a;
+				if (strncmp(a->name,"kmall", 5) == 0)
+					return best;
+			}
+	}
+	if (best)
+		return best;
+	fatal("Cannot find alias for %s\n", find->name);
+	return NULL;
+}
+
+unsigned long slab_size(struct slabinfo *s)
+{
+	return 	s->slabs * (page_size << s->order);
+}
+
+
+void slabcache(struct slabinfo *s)
+{
+	char size_str[20];
+	char dist_str[40];
+	char flags[20];
+	char *p = flags;
+
+	if (skip_zero && !s->slabs)
+		return;
+
+	store_size(size_str, slab_size(s));
+	sprintf(dist_str,"%lu/%lu/%d", s->slabs, s->partial, s->cpu_slabs);
+
+	if (!line++)
+		first_line();
+
+	if (s->aliases)
+		*p++ = '*';
+	if (s->cache_dma)
+		*p++ = 'd';
+	if (s->hwcache_align)
+		*p++ = 'A';
+	if (s->poison)
+		*p++ = 'P';
+	if (s->reclaim_account)
+		*p++ = 'a';
+	if (s->red_zone)
+		*p++ = 'Z';
+	if (s->sanity_checks)
+		*p++ = 'F';
+	if (s->store_user)
+		*p++ = 'U';
+	if (s->trace)
+		*p++ = 'T';
+
+	*p = 0;
+	printf("%-21s %8ld %7d %8s %14s %4d %1d %3ld %3ld %s\n",
+		s->name, s->objects, s->object_size, size_str, dist_str,
+		s->objs_per_slab, s->order,
+		s->slabs ? (s->partial * 100) / s->slabs : 100,
+		s->slabs ? (s->objects * s->object_size * 100) /
+			(s->slabs * (page_size << s->order)) : 100,
+		flags);
+}
+
+void slab_numa(struct slabinfo *s)
+{
+	int node;
+
+	if (!highest_node)
+		fatal("No NUMA information available.\n");
+
+	if (skip_zero && !s->slabs)
+		return;
+
+	if (!line) {
+		printf("\nSlab             Node ");
+		for(node = 0; node <= highest_node; node++)
+			printf(" %4d", node);
+		printf("\n----------------------");
+		for(node = 0; node <= highest_node; node++)
+			printf("-----");
+		printf("\n");
+	}
+	printf("%-21s ", s->name);
+	for(node = 0; node <= highest_node; node++) {
+		char b[20];
+
+		store_size(b, s->numa[node]);
+		printf(" %4s", b);
+	}
+	printf("\n");
+	line++;
+}
+
+void show_tracking(struct slabinfo *s)
+{
+	printf("\n%s: Calls to allocate a slab object\n", s->name);
+	printf("---------------------------------------------------\n");
+	if (read_obj("alloc_calls"))
+		printf(buffer);
+
+	printf("%s: Calls to free a slab object\n", s->name);
+	printf("-----------------------------------------------\n");
+	if (read_obj("free_calls"))
+		printf(buffer);
+
+}
+
+void totals(void)
+{
+	struct slabinfo *s;
+
+	int used_slabs = 0;
+	char b1[20], b2[20], b3[20], b4[20];
+	unsigned long long max = 1ULL << 63;
+
+	/* Object size */
+	unsigned long long min_objsize = max, max_objsize = 0, avg_objsize;
+
+	/* Number of partial slabs in a slabcache */
+	unsigned long long min_partial = max, max_partial = 0,
+				avg_partial, total_partial = 0;
+
+	/* Number of slabs in a slab cache */
+	unsigned long long min_slabs = max, max_slabs = 0,
+				avg_slabs, total_slabs = 0;
+
+	/* Size of the whole slab */
+	unsigned long long min_size = max, max_size = 0,
+				avg_size, total_size = 0;
+
+	/* Bytes used for object storage in a slab */
+	unsigned long long min_used = max, max_used = 0,
+				avg_used, total_used = 0;
+
+	/* Waste: Bytes used for alignment and padding */
+	unsigned long long min_waste = max, max_waste = 0,
+				avg_waste, total_waste = 0;
+	/* Number of objects in a slab */
+	unsigned long long min_objects = max, max_objects = 0,
+				avg_objects, total_objects = 0;
+	/* Waste per object */
+	unsigned long long min_objwaste = max,
+				max_objwaste = 0, avg_objwaste,
+				total_objwaste = 0;
+
+	/* Memory per object */
+	unsigned long long min_memobj = max,
+				max_memobj = 0, avg_memobj,
+				total_objsize = 0;
+
+	/* Percentage of partial slabs per slab */
+	unsigned long min_ppart = 100, max_ppart = 0,
+				avg_ppart, total_ppart = 0;
+
+	/* Number of objects in partial slabs */
+	unsigned long min_partobj = max, max_partobj = 0,
+				avg_partobj, total_partobj = 0;
+
+	/* Percentage of partial objects of all objects in a slab */
+	unsigned long min_ppartobj = 100, max_ppartobj = 0,
+				avg_ppartobj, total_ppartobj = 0;
+
+
+	for (s = slabinfo; s < slabinfo + slabs; s++) {
+		unsigned long long size;
+		unsigned long used;
+		unsigned long long wasted;
+		unsigned long long objwaste;
+		long long objects_in_partial_slabs;
+		unsigned long percentage_partial_slabs;
+		unsigned long percentage_partial_objs;
+
+		if (!s->slabs || !s->objects)
+			continue;
+
+		used_slabs++;
+
+		size = slab_size(s);
+		used = s->objects * s->object_size;
+		wasted = size - used;
+		objwaste = s->slab_size - s->object_size;
+
+		objects_in_partial_slabs = s->objects -
+			(s->slabs - s->partial - s ->cpu_slabs) *
+			s->objs_per_slab;
+
+		if (objects_in_partial_slabs < 0)
+			objects_in_partial_slabs = 0;
+
+		percentage_partial_slabs = s->partial * 100 / s->slabs;
+		if (percentage_partial_slabs > 100)
+			percentage_partial_slabs = 100;
+
+		percentage_partial_objs = objects_in_partial_slabs * 100
+							/ s->objects;
+
+		if (percentage_partial_objs > 100)
+			percentage_partial_objs = 100;
+
+		if (s->object_size < min_objsize)
+			min_objsize = s->object_size;
+		if (s->partial < min_partial)
+			min_partial = s->partial;
+		if (s->slabs < min_slabs)
+			min_slabs = s->slabs;
+		if (size < min_size)
+			min_size = size;
+		if (wasted < min_waste)
+			min_waste = wasted;
+		if (objwaste < min_objwaste)
+			min_objwaste = objwaste;
+		if (s->objects < min_objects)
+			min_objects = s->objects;
+		if (used < min_used)
+			min_used = used;
+		if (objects_in_partial_slabs < min_partobj)
+			min_partobj = objects_in_partial_slabs;
+		if (percentage_partial_slabs < min_ppart)
+			min_ppart = percentage_partial_slabs;
+		if (percentage_partial_objs < min_ppartobj)
+			min_ppartobj = percentage_partial_objs;
+		if (s->slab_size < min_memobj)
+			min_memobj = s->slab_size;
+
+		if (s->object_size > max_objsize)
+			max_objsize = s->object_size;
+		if (s->partial > max_partial)
+			max_partial = s->partial;
+		if (s->slabs > max_slabs)
+			max_slabs = s->slabs;
+		if (size > max_size)
+			max_size = size;
+		if (wasted > max_waste)
+			max_waste = wasted;
+		if (objwaste > max_objwaste)
+			max_objwaste = objwaste;
+		if (s->objects > max_objects)
+			max_objects = s->objects;
+		if (used > max_used)
+			max_used = used;
+		if (objects_in_partial_slabs > max_partobj)
+			max_partobj = objects_in_partial_slabs;
+		if (percentage_partial_slabs > max_ppart)
+			max_ppart = percentage_partial_slabs;
+		if (percentage_partial_objs > max_ppartobj)
+			max_ppartobj = percentage_partial_objs;
+		if (s->slab_size > max_memobj)
+			max_memobj = s->slab_size;
+
+		total_partial += s->partial;
+		total_slabs += s->slabs;
+		total_size += size;
+		total_waste += wasted;
+
+		total_objects += s->objects;
+		total_used += used;
+		total_partobj += objects_in_partial_slabs;
+		total_ppart += percentage_partial_slabs;
+		total_ppartobj += percentage_partial_objs;
+
+		total_objwaste += s->objects * objwaste;
+		total_objsize += s->objects * s->slab_size;
+	}
+
+	if (!total_objects) {
+		printf("No objects\n");
+		return;
+	}
+	if (!used_slabs) {
+		printf("No slabs\n");
+		return;
+	}
+
+	/* Per slab averages */
+	avg_partial = total_partial / used_slabs;
+	avg_slabs = total_slabs / used_slabs;
+	avg_size = total_size / used_slabs;
+	avg_waste = total_waste / used_slabs;
+
+	avg_objects = total_objects / used_slabs;
+	avg_used = total_used / used_slabs;
+	avg_partobj = total_partobj / used_slabs;
+	avg_ppart = total_ppart / used_slabs;
+	avg_ppartobj = total_ppartobj / used_slabs;
+
+	/* Per object object sizes */
+	avg_objsize = total_used / total_objects;
+	avg_objwaste = total_objwaste / total_objects;
+	avg_partobj = total_partobj * 100 / total_objects;
+	avg_memobj = total_objsize / total_objects;
+
+	printf("Slabcache Totals\n");
+	printf("----------------\n");
+	printf("Slabcaches : %3d      Aliases  : %3d->%-3d Active: %3d\n",
+			slabs, aliases, alias_targets, used_slabs);
+
+	store_size(b1, total_size);store_size(b2, total_waste);
+	store_size(b3, total_waste * 100 / total_used);
+	printf("Memory used: %6s   # Loss   : %6s   MRatio: %6s%%\n", b1, b2, b3);
+
+	store_size(b1, total_objects);store_size(b2, total_partobj);
+	store_size(b3, total_partobj * 100 / total_objects);
+	printf("# Objects  : %6s   # PartObj: %6s   ORatio: %6s%%\n", b1, b2, b3);
+
+	printf("\n");
+	printf("Per Cache    Average         Min         Max       Total\n");
+	printf("---------------------------------------------------------\n");
+
+	store_size(b1, avg_objects);store_size(b2, min_objects);
+	store_size(b3, max_objects);store_size(b4, total_objects);
+	printf("#Objects  %10s  %10s  %10s  %10s\n",
+			b1,	b2,	b3,	b4);
+
+	store_size(b1, avg_slabs);store_size(b2, min_slabs);
+	store_size(b3, max_slabs);store_size(b4, total_slabs);
+	printf("#Slabs    %10s  %10s  %10s  %10s\n",
+			b1,	b2,	b3,	b4);
+
+	store_size(b1, avg_partial);store_size(b2, min_partial);
+	store_size(b3, max_partial);store_size(b4, total_partial);
+	printf("#PartSlab %10s  %10s  %10s  %10s\n",
+			b1,	b2,	b3,	b4);
+	store_size(b1, avg_ppart);store_size(b2, min_ppart);
+	store_size(b3, max_ppart);
+	store_size(b4, total_partial * 100  / total_slabs);
+	printf("%%PartSlab %10s%% %10s%% %10s%% %10s%%\n",
+			b1,	b2,	b3,	b4);
+
+	store_size(b1, avg_partobj);store_size(b2, min_partobj);
+	store_size(b3, max_partobj);
+	store_size(b4, total_partobj);
+	printf("PartObjs  %10s  %10s  %10s  %10s\n",
+			b1,	b2,	b3,	b4);
+
+	store_size(b1, avg_ppartobj);store_size(b2, min_ppartobj);
+	store_size(b3, max_ppartobj);
+	store_size(b4, total_partobj * 100 / total_objects);
+	printf("%% PartObj %10s%% %10s%% %10s%% %10s%%\n",
+			b1,	b2,	b3,	b4);
+
+	store_size(b1, avg_size);store_size(b2, min_size);
+	store_size(b3, max_size);store_size(b4, total_size);
+	printf("Memory    %10s  %10s  %10s  %10s\n",
+			b1,	b2,	b3,	b4);
+
+	store_size(b1, avg_used);store_size(b2, min_used);
+	store_size(b3, max_used);store_size(b4, total_used);
+	printf("Used      %10s  %10s  %10s  %10s\n",
+			b1,	b2,	b3,	b4);
+
+	store_size(b1, avg_waste);store_size(b2, min_waste);
+	store_size(b3, max_waste);store_size(b4, total_waste);
+	printf("Loss      %10s  %10s  %10s  %10s\n",
+			b1,	b2,	b3,	b4);
+
+	printf("\n");
+	printf("Per Object   Average         Min         Max\n");
+	printf("---------------------------------------------\n");
+
+	store_size(b1, avg_memobj);store_size(b2, min_memobj);
+	store_size(b3, max_memobj);
+	printf("Memory    %10s  %10s  %10s\n",
+			b1,	b2,	b3);
+	store_size(b1, avg_objsize);store_size(b2, min_objsize);
+	store_size(b3, max_objsize);
+	printf("User      %10s  %10s  %10s\n",
+			b1,	b2,	b3);
+
+	store_size(b1, avg_objwaste);store_size(b2, min_objwaste);
+	store_size(b3, max_objwaste);
+	printf("Loss      %10s  %10s  %10s\n",
+			b1,	b2,	b3);
+}
+
+void sort_slabs(void)
+{
+	struct slabinfo *s1,*s2;
+
+	for (s1 = slabinfo; s1 < slabinfo + slabs; s1++) {
+		for (s2 = s1 + 1; s2 < slabinfo + slabs; s2++) {
+			int result;
+
+			if (sort_size)
+				result = slab_size(s1) < slab_size(s2);
+			else
+				result = strcasecmp(s1->name, s2->name);
+
+			if (show_inverted)
+				result = -result;
+
+			if (result > 0) {
+				struct slabinfo t;
+
+				memcpy(&t, s1, sizeof(struct slabinfo));
+				memcpy(s1, s2, sizeof(struct slabinfo));
+				memcpy(s2, &t, sizeof(struct slabinfo));
+			}
+		}
+	}
+}
+
+void sort_aliases(void)
+{
+	struct aliasinfo *a1,*a2;
+
+	for (a1 = aliasinfo; a1 < aliasinfo + aliases; a1++) {
+		for (a2 = a1 + 1; a2 < aliasinfo + aliases; a2++) {
+			char *n1, *n2;
+
+			n1 = a1->name;
+			n2 = a2->name;
+			if (show_alias && !show_inverted) {
+				n1 = a1->ref;
+				n2 = a2->ref;
+			}
+			if (strcasecmp(n1, n2) > 0) {
+				struct aliasinfo t;
+
+				memcpy(&t, a1, sizeof(struct aliasinfo));
+				memcpy(a1, a2, sizeof(struct aliasinfo));
+				memcpy(a2, &t, sizeof(struct aliasinfo));
+			}
+		}
+	}
+}
+
+void link_slabs(void)
+{
+	struct aliasinfo *a;
+	struct slabinfo *s;
+
+	for (a = aliasinfo; a < aliasinfo + aliases; a++) {
+
+		for(s = slabinfo; s < slabinfo + slabs; s++)
+			if (strcmp(a->ref, s->name) == 0) {
+				a->slab = s;
+				s->refs++;
+				break;
+			}
+		if (s == slabinfo + slabs)
+			fatal("Unresolved alias %s\n", a->ref);
+	}
+}
+
+void alias(void)
+{
+	struct aliasinfo *a;
+	char *active = NULL;
+
+	sort_aliases();
+	link_slabs();
+
+	for(a = aliasinfo; a < aliasinfo + aliases; a++) {
+
+		if (!show_single_ref && a->slab->refs == 1)
+			continue;
+
+		if (!show_inverted) {
+			if (active) {
+				if (strcmp(a->slab->name, active) == 0) {
+					printf(" %s", a->name);
+					continue;
+				}
+			}
+			printf("\n%-20s <- %s", a->slab->name, a->name);
+			active = a->slab->name;
+		}
+		else
+			printf("%-20s -> %s\n", a->name, a->slab->name);
+	}
+	if (active)
+		printf("\n");
+}
+
+
+void rename_slabs(void)
+{
+	struct slabinfo *s;
+	struct aliasinfo *a;
+
+	for (s = slabinfo; s < slabinfo + slabs; s++) {
+		if (*s->name != ':')
+			continue;
+
+		if (s->refs > 1 && !show_first_alias)
+			continue;
+
+		a = find_one_alias(s);
+
+		s->name = a->name;
+	}
+}
+
+int slab_mismatch(char *slab)
+{
+	return regexec(&pattern, slab, 0, NULL, 0);
+}
+
+void read_slab_dir(void)
+{
+	DIR *dir;
+	struct dirent *de;
+	struct slabinfo *slab = slabinfo;
+	struct aliasinfo *alias = aliasinfo;
+	char *p;
+	char *t;
+	int count;
+
+	dir = opendir(".");
+	while ((de = readdir(dir))) {
+		if (de->d_name[0] == '.' ||
+				slab_mismatch(de->d_name))
+			continue;
+		switch (de->d_type) {
+		   case DT_LNK:
+		   	alias->name = strdup(de->d_name);
+			count = readlink(de->d_name, buffer, sizeof(buffer));
+
+			if (count < 0)
+				fatal("Cannot read symlink %s\n", de->d_name);
+
+			buffer[count] = 0;
+			p = buffer + count;
+			while (p > buffer && p[-1] != '/')
+				p--;
+			alias->ref = strdup(p);
+			alias++;
+			break;
+		   case DT_DIR:
+			if (chdir(de->d_name))
+				fatal("Unable to access slab %s\n", slab->name);
+		   	slab->name = strdup(de->d_name);
+			slab->alias = 0;
+			slab->refs = 0;
+			slab->aliases = get_obj("aliases");
+			slab->align = get_obj("align");
+			slab->cache_dma = get_obj("cache_dma");
+			slab->cpu_slabs = get_obj("cpu_slabs");
+			slab->destroy_by_rcu = get_obj("destroy_by_rcu");
+			slab->hwcache_align = get_obj("hwcache_align");
+			slab->object_size = get_obj("object_size");
+			slab->objects = get_obj("objects");
+			slab->objs_per_slab = get_obj("objs_per_slab");
+			slab->order = get_obj("order");
+			slab->partial = get_obj("partial");
+			slab->partial = get_obj_and_str("partial", &t);
+			decode_numa_list(slab->numa_partial, t);
+			slab->poison = get_obj("poison");
+			slab->reclaim_account = get_obj("reclaim_account");
+			slab->red_zone = get_obj("red_zone");
+			slab->sanity_checks = get_obj("sanity_checks");
+			slab->slab_size = get_obj("slab_size");
+			slab->slabs = get_obj_and_str("slabs", &t);
+			decode_numa_list(slab->numa, t);
+			slab->store_user = get_obj("store_user");
+			slab->trace = get_obj("trace");
+			chdir("..");
+			if (slab->name[0] == ':')
+				alias_targets++;
+			slab++;
+			break;
+		   default :
+			fatal("Unknown file type %lx\n", de->d_type);
+		}
+	}
+	closedir(dir);
+	slabs = slab - slabinfo;
+	aliases = alias - aliasinfo;
+	if (slabs > MAX_SLABS)
+		fatal("Too many slabs\n");
+	if (aliases > MAX_ALIASES)
+		fatal("Too many aliases\n");
+}
+
+void output_slabs(void)
+{
+	struct slabinfo *slab;
+
+	for (slab = slabinfo; slab < slabinfo + slabs; slab++) {
+
+		if (slab->alias)
+			continue;
+
+
+		if (show_numa)
+			slab_numa(slab);
+		else
+		if (show_track)
+			show_tracking(slab);
+		else
+		if (validate)
+			slab_validate(slab);
+		else
+		if (shrink)
+			slab_shrink(slab);
+		else {
+			if (show_slab)
+				slabcache(slab);
+		}
+	}
+}
+
+struct option opts[] = {
+	{ "aliases", 0, NULL, 'a' },
+	{ "slabs", 0, NULL, 'l' },
+	{ "numa", 0, NULL, 'n' },
+	{ "zero", 0, NULL, 'z' },
+	{ "help", 0, NULL, 'h' },
+	{ "validate", 0, NULL, 'v' },
+	{ "first-alias", 0, NULL, 'f' },
+	{ "shrink", 0, NULL, 's' },
+	{ "track", 0, NULL, 't'},
+	{ "inverted", 0, NULL, 'i'},
+	{ "1ref", 0, NULL, '1'},
+	{ NULL, 0, NULL, 0 }
+};
+
+int main(int argc, char *argv[])
+{
+	int c;
+	int err;
+	char *pattern_source;
+
+	page_size = getpagesize();
+	if (chdir("/sys/slab"))
+		fatal("This kernel does not have SLUB support.\n");
+
+	while ((c = getopt_long(argc, argv, "afhil1npstvzTS", opts, NULL)) != -1)
+	switch(c) {
+		case '1':
+			show_single_ref = 1;
+			break;
+		case 'a':
+			show_alias = 1;
+			break;
+		case 'f':
+			show_first_alias = 1;
+			break;
+		case 'h':
+			usage();
+			return 0;
+		case 'i':
+			show_inverted = 1;
+			break;
+		case 'n':
+			show_numa = 1;
+			break;
+		case 's':
+			shrink = 1;
+			break;
+		case 'l':
+			show_slab = 1;
+			break;
+		case 't':
+			show_track = 1;
+			break;
+		case 'v':
+			validate = 1;
+			break;
+		case 'z':
+			skip_zero = 0;
+			break;
+		case 'T':
+			show_totals = 1;
+			break;
+		case 'S':
+			sort_size = 1;
+			break;
+
+		default:
+			fatal("%s: Invalid option '%c'\n", argv[0], optopt);
+
+	}
+
+	if (!show_slab && !show_alias && !show_track
+		&& !validate && !shrink)
+			show_slab = 1;
+
+	if (argc > optind)
+		pattern_source = argv[optind];
+	else
+		pattern_source = ".*";
+
+	err = regcomp(&pattern, pattern_source, REG_ICASE|REG_NOSUB);
+	if (err)
+		fatal("%s: Invalid pattern '%s' code %d\n",
+			argv[0], pattern_source, err);
+	read_slab_dir();
+	if (show_alias)
+		alias();
+	else
+	if (show_totals)
+		totals();
+	else {
+		link_slabs();
+		rename_slabs();
+		sort_slabs();
+		output_slabs();
+	}
+	return 0;
+}
-- 
cgit v1.2.3


From 2b744c01a54fe0c9974ff1b29522f25f07084053 Mon Sep 17 00:00:00 2001
From: Yasunori Goto <y-goto@jp.fujitsu.com>
Date: Sun, 6 May 2007 14:49:59 -0700
Subject: mm: fix handling of panic_on_oom when cpusets are in use

The current panic_on_oom may not work if there is a process using
cpusets/mempolicy, because other nodes' memory may remain.  But some people
want failover by panic ASAP even if they are used.  This patch makes new
setting for its request.

This is tested on my ia64 box which has 3 nodes.

Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: Benjamin LaHaise <bcrl@kvack.org>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Ethan Solomita <solo@google.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/sysctl/vm.txt | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index e96a341eb7e..1d192565e18 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -197,11 +197,22 @@ and may not be fast.
 
 panic_on_oom
 
-This enables or disables panic on out-of-memory feature.  If this is set to 1,
-the kernel panics when out-of-memory happens.  If this is set to 0, the kernel
-will kill some rogue process, called oom_killer.  Usually, oom_killer can kill
-rogue processes and system will survive.  If you want to panic the system
-rather than killing rogue processes, set this to 1.
+This enables or disables panic on out-of-memory feature.
 
-The default value is 0.
+If this is set to 0, the kernel will kill some rogue process,
+called oom_killer.  Usually, oom_killer can kill rogue processes and
+system will survive.
+
+If this is set to 1, the kernel panics when out-of-memory happens.
+However, if a process limits using nodes by mempolicy/cpusets,
+and those nodes become memory exhaustion status, one process
+may be killed by oom-killer. No panic occurs in this case.
+Because other nodes' memory may be free. This means system total status
+may be not fatal yet.
 
+If this is set to 2, the kernel panics compulsorily even on the
+above-mentioned.
+
+The default value is 0.
+1 and 2 are for failover of clustering. Please select either
+according to your policy of failover.
-- 
cgit v1.2.3


From 1394f03221790a988afc3e4b3cb79f2e477246a9 Mon Sep 17 00:00:00 2001
From: Bryan Wu <bryan.wu@analog.com>
Date: Sun, 6 May 2007 14:50:22 -0700
Subject: blackfin architecture

This adds support for the Analog Devices Blackfin processor architecture, and
currently supports the BF533, BF532, BF531, BF537, BF536, BF534, and BF561
(Dual Core) devices, with a variety of development platforms including those
avaliable from Analog Devices (BF533-EZKit, BF533-STAMP, BF537-STAMP,
BF561-EZKIT), and Bluetechnix!  Tinyboards.

The Blackfin architecture was jointly developed by Intel and Analog Devices
Inc.  (ADI) as the Micro Signal Architecture (MSA) core and introduced it in
December of 2000.  Since then ADI has put this core into its Blackfin
processor family of devices.  The Blackfin core has the advantages of a clean,
orthogonal,RISC-like microprocessor instruction set.  It combines a dual-MAC
(Multiply/Accumulate), state-of-the-art signal processing engine and
single-instruction, multiple-data (SIMD) multimedia capabilities into a single
instruction-set architecture.

The Blackfin architecture, including the instruction set, is described by the
ADSP-BF53x/BF56x Blackfin Processor Programming Reference
http://blackfin.uclinux.org/gf/download/frsrelease/29/2549/Blackfin_PRM.pdf

The Blackfin processor is already supported by major releases of gcc, and
there are binary and source rpms/tarballs for many architectures at:
http://blackfin.uclinux.org/gf/project/toolchain/frs There is complete
documentation, including "getting started" guides available at:
http://docs.blackfin.uclinux.org/ which provides links to the sources and
patches you will need in order to set up a cross-compiling environment for
bfin-linux-uclibc

This patch, as well as the other patches (toolchain, distribution,
uClibc) are actively supported by Analog Devices Inc, at:
http://blackfin.uclinux.org/

We have tested this on LTP, and our test plan (including pass/fails) can
be found at:
http://docs.blackfin.uclinux.org/doku.php?id=testing_the_linux_kernel

[m.kozlowski@tuxland.pl: balance parenthesis in blackfin header files]
Signed-off-by: Bryan Wu <bryan.wu@analog.com>
Signed-off-by: Mariusz Kozlowski <m.kozlowski@tuxland.pl>
Signed-off-by: Aubrey Li <aubrey.li@analog.com>
Signed-off-by: Jie Zhang <jie.zhang@analog.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/blackfin/00-INDEX          |  11 ++
 Documentation/blackfin/Filesystems       | 169 +++++++++++++++++++++++++++++++
 Documentation/blackfin/cache-lock.txt    |  48 +++++++++
 Documentation/blackfin/cachefeatures.txt |  65 ++++++++++++
 4 files changed, 293 insertions(+)
 create mode 100644 Documentation/blackfin/00-INDEX
 create mode 100644 Documentation/blackfin/Filesystems
 create mode 100644 Documentation/blackfin/cache-lock.txt
 create mode 100644 Documentation/blackfin/cachefeatures.txt

(limited to 'Documentation')

diff --git a/Documentation/blackfin/00-INDEX b/Documentation/blackfin/00-INDEX
new file mode 100644
index 00000000000..7cb3b356b24
--- /dev/null
+++ b/Documentation/blackfin/00-INDEX
@@ -0,0 +1,11 @@
+00-INDEX
+	- This file
+
+cache-lock.txt
+	- HOWTO for blackfin cache locking.
+
+cachefeatures.txt
+	- Supported cache features.
+
+Filesystems
+	- Requirements for mounting the root file system.
diff --git a/Documentation/blackfin/Filesystems b/Documentation/blackfin/Filesystems
new file mode 100644
index 00000000000..51260a1b803
--- /dev/null
+++ b/Documentation/blackfin/Filesystems
@@ -0,0 +1,169 @@
+/*
+ * File:         Documentation/blackfin/Filesystems
+ * Based on:
+ * Author:
+ *
+ * Created:
+ * Description:  This file contains the simple DMA Implementation for Blackfin
+ *
+ * Rev:          $Id: Filesystems 2384 2006-11-01 04:12:43Z magicyang $
+ *
+ * Modified:
+ *               Copyright 2004-2006 Analog Devices Inc.
+ *
+ * Bugs:         Enter bugs at http://blackfin.uclinux.org/
+ *
+ */
+
+		How to mount the root file system in uClinux/Blackfin
+		-----------------------------------------------------
+
+1	Mounting EXT3 File system.
+	------------------------
+
+	Creating an EXT3 File system for uClinux/Blackfin:
+
+
+Please follow the steps to form the EXT3 File system and mount the same as root
+file system.
+
+a	Make an ext3 file system as large as you want the final root file
+	system.
+
+		mkfs.ext3  /dev/ram0 <your-rootfs-size-in-1k-blocks>
+
+b	Mount this Empty file system on a free directory as:
+
+		mount -t ext3 /dev/ram0  ./test
+			where ./test is the empty directory.
+
+c	Copy your root fs directory that you have so carefully made over.
+
+		cp -af  /tmp/my_final_rootfs_files/* ./test
+
+		(For ex: cp -af uClinux-dist/romfs/* ./test)
+
+d	If you have done everything right till now you should be able to see
+	the required "root" dir's (that's etc, root, bin, lib, sbin...)
+
+e	Now unmount the file system
+
+		umount  ./test
+
+f	Create the root file system image.
+
+		dd if=/dev/ram0 bs=1k count=<your-rootfs-size-in-1k-blocks> \
+		> ext3fs.img
+
+
+Now you have to tell the kernel that will be mounting this file system as
+rootfs.
+So do a make menuconfig under kernel and select the Ext3 journaling file system
+support under File system --> submenu.
+
+
+2.	Mounting EXT2 File system.
+	-------------------------
+
+By default the ext2 file system image will be created if you invoke make from
+the top uClinux-dist directory.
+
+
+3.	Mounting CRAMFS File System
+	----------------------------
+
+To create a CRAMFS file system image execute the command
+
+	mkfs.cramfs ./test cramfs.img
+
+	where ./test is the target directory.
+
+
+4.	Mounting ROMFS File System
+	--------------------------
+
+To create a ROMFS file system image execute the command
+
+	genromfs -v -V "ROMdisk" -f romfs.img -d ./test
+
+	where ./test is the target directory
+
+
+5.	Mounting the JFFS2 Filesystem
+	-----------------------------
+
+To create a compressed JFFS filesystem (JFFS2), please execute the command
+
+	mkfs.jffs2 -d ./test -o jffs2.img
+
+	where ./test is the target directory.
+
+However, please make sure the following is in your kernel config.
+
+/*
+ * RAM/ROM/Flash chip drivers
+ */
+#define CONFIG_MTD_CFI 1
+#define CONFIG_MTD_ROM 1
+/*
+ * Mapping drivers for chip access
+ */
+#define CONFIG_MTD_COMPLEX_MAPPINGS 1
+#define CONFIG_MTD_BF533 1
+#undef CONFIG_MTD_UCLINUX
+
+Through the u-boot boot loader, use the jffs2.img in the corresponding
+partition made in linux-2.6.x/drivers/mtd/maps/bf533_flash.c.
+
+NOTE - 	Currently the Flash driver is available only for EZKIT. Watch out for a
+	STAMP driver soon.
+
+
+6. 	Mounting the NFS File system
+	-----------------------------
+
+	For mounting the NFS please do the following in the kernel config.
+
+	In Networking Support --> Networking options --> TCP/IP networking -->
+		IP: kernel level autoconfiguration
+
+	Enable BOOTP Support.
+
+	In Kernel hacking --> Compiled-in kernel boot parameter add the following
+
+		root=/dev/nfs rw ip=bootp
+
+	In File system --> Network File system, Enable
+
+		NFS file system support --> NFSv3 client support
+		Root File system on NFS
+
+	in uClibc menuconfig, do the following
+	In Networking Support
+		enable Remote Procedure Call (RPC) support
+			Full RPC Support
+
+	On the Host side, ensure that /etc/dhcpd.conf looks something like this
+
+		ddns-update-style ad-hoc;
+		allow bootp;
+		subnet 10.100.4.0 netmask 255.255.255.0 {
+		default-lease-time 122209600;
+		max-lease-time 31557600;
+		group {
+			host bf533 {
+				hardware ethernet 00:CF:52:49:C3:01;
+				fixed-address 10.100.4.50;
+				option root-path "/home/nfsmount";
+			}
+		}
+
+	ensure that /etc/exports looks something like this
+		/home/nfsmount *(rw,no_root_squash,no_all_squash)
+
+	 run the following commands as root (may differ depending on your
+	 distribution) :
+		-  service nfs start
+		-  service portmap start
+		-  service dhcpd start
+		-  /usr/sbin/exportfs
diff --git a/Documentation/blackfin/cache-lock.txt b/Documentation/blackfin/cache-lock.txt
new file mode 100644
index 00000000000..88ba1e6c31c
--- /dev/null
+++ b/Documentation/blackfin/cache-lock.txt
@@ -0,0 +1,48 @@
+/*
+ * File:         Documentation/blackfin/cache-lock.txt
+ * Based on:
+ * Author:
+ *
+ * Created:
+ * Description:  This file contains the simple DMA Implementation for Blackfin
+ *
+ * Rev:          $Id: cache-lock.txt 2384 2006-11-01 04:12:43Z magicyang $
+ *
+ * Modified:
+ *               Copyright 2004-2006 Analog Devices Inc.
+ *
+ * Bugs:         Enter bugs at http://blackfin.uclinux.org/
+ *
+ */
+
+How to lock your code in cache in uClinux/blackfin
+--------------------------------------------------
+
+There are only a few steps required to lock your code into the cache.
+Currently you can lock the code by Way.
+
+Below are the interface provided for locking the cache.
+
+
+1. cache_grab_lock(int Ways);
+
+This function grab the lock for locking your code into the cache specified
+by Ways.
+
+
+2. cache_lock(int Ways);
+
+This function should be called after your critical code has been executed.
+Once the critical code exits, the code is now loaded into the cache. This
+function locks the code into the cache.
+
+
+So, the example sequence will be:
+
+	cache_grab_lock(WAY0_L);	/* Grab the lock */
+
+	critical_code();		/* Execute the code of interest */
+
+	cache_lock(WAY0_L);		/* Lock the cache */
+
+Where WAY0_L signifies WAY0 locking.
diff --git a/Documentation/blackfin/cachefeatures.txt b/Documentation/blackfin/cachefeatures.txt
new file mode 100644
index 00000000000..0fbec23becb
--- /dev/null
+++ b/Documentation/blackfin/cachefeatures.txt
@@ -0,0 +1,65 @@
+/*
+ * File:         Documentation/blackfin/cachefeatures.txt
+ * Based on:
+ * Author:
+ *
+ * Created:
+ * Description:  This file contains the simple DMA Implementation for Blackfin
+ *
+ * Rev:          $Id: cachefeatures.txt 2384 2006-11-01 04:12:43Z magicyang $
+ *
+ * Modified:
+ *               Copyright 2004-2006 Analog Devices Inc.
+ *
+ * Bugs:         Enter bugs at http://blackfin.uclinux.org/
+ *
+ */
+
+	- Instruction and Data cache initialization.
+		icache_init();
+		dcache_init();
+
+	-  Instruction and Data cache Invalidation Routines, when flushing the
+	   same is not required.
+		_icache_invalidate();
+		_dcache_invalidate();
+
+	Also, for invalidating the entire instruction and data cache, the below
+	routines are provided (another method for invalidation, refer page no 267 and 287 of
+	ADSP-BF533 Hardware Reference manual)
+
+		invalidate_entire_dcache();
+		invalidate_entire_icache();
+
+	-External Flushing of Instruction and data cache routines.
+
+		flush_instruction_cache();
+		flush_data_cache();
+
+	- Internal Flushing of Instruction and Data Cache.
+
+		icplb_flush();
+		dcplb_flush();
+
+	- Locking the cache.
+
+		cache_grab_lock();
+		cache_lock();
+
+	Please refer linux-2.6.x/Documentation/blackfin/cache-lock.txt for how to
+	lock the cache.
+
+	Locking the cache is optional feature.
+
+	- Miscellaneous cache functions.
+
+		flush_cache_all();
+		flush_cache_mm();
+		invalidate_dcache_range();
+		flush_dcache_range();
+		flush_dcache_page();
+		flush_cache_range();
+		flush_cache_page();
+		invalidate_dcache_range();
+		flush_page_to_ram();
+
-- 
cgit v1.2.3


From f0ced9b229cfbc76b5db9837b4b256b602d56610 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sun, 6 May 2007 14:50:50 -0700
Subject: power management: change /sys/power/disk display

Change /sys/power/disk to display all valid modes as well as the currently
selected one in a fashion known from the LED subsystem.

This changes userspace API, but it is apparently not used much (we asked
some userspace developers)

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Acked-by: "Rafael J. Wysocki" <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/power/interface.txt | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/power/interface.txt b/Documentation/power/interface.txt
index 8c5b41bf3f3..fd5192a8fa8 100644
--- a/Documentation/power/interface.txt
+++ b/Documentation/power/interface.txt
@@ -34,8 +34,12 @@ for 5 seconds, resume devices, unfreeze tasks and enable nonboot CPUs.  Then,
 we are able to look in the log messages and work out, for example, which code
 is being slow and which device drivers are misbehaving.
 
-Reading from this file will display what the mode is currently set
-to. Writing to this file will accept one of
+Reading from this file will display all supported modes and the currently
+selected one in brackets, for example
+
+	[shutdown] reboot test testproc
+
+Writing to this file will accept one of
 
        'platform' (only if the platform supports it)
        'shutdown'
-- 
cgit v1.2.3