From bf6285278418f1dc6f07296bbb286da0bfe26d5d Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 8 Dec 2006 16:14:22 -0800 Subject: [IA64] Itanium MC Error Injection Tool: Doc and sample application This patch contains a documention and sample application. Since the sample application has ~1000 lines of code, it might not be suitable in a kernel documention in kenrel tree. If you think this is not good place to hold the sample application, please let me know and I'm open to other choices e.g. sourceforge etc. Signed-off-by: Fenghua Yu Signed-off-by: Tony Luck --- Documentation/ia64/err_inject.txt | 1068 +++++++++++++++++++++++++++++++++++++ 1 file changed, 1068 insertions(+) create mode 100644 Documentation/ia64/err_inject.txt (limited to 'Documentation') diff --git a/Documentation/ia64/err_inject.txt b/Documentation/ia64/err_inject.txt new file mode 100644 index 00000000000..26487c172cf --- /dev/null +++ b/Documentation/ia64/err_inject.txt @@ -0,0 +1,1068 @@ + +IPF Machine Check (MC) error inject tool +======================================== + +IPF Machine Check (MC) error inject tool is used to inject MC +errors from Linux. The tool is a test bed for IPF MC work flow including +hardware correctable error handling, OS recoverable error handling, MC +event logging, etc. + +The tool includes two parts: a kernel driver and a user application +sample. The driver provides interface to PAL to inject error +and query error injection capabilities. The driver code is in +arch/ia64/kernel/err_inject.c. The application sample (shown below) +provides a combination of various errors and calls the driver's interface +(sysfs interface) to inject errors or query error injection capabilities. + +The tool can be used to test Intel IPF machine MC handling capabilities. +It's especially useful for people who can not access hardware MC injection +tool to inject error. It's also very useful to integrate with other +software test suits to do stressful testing on IPF. + +Below is a sample application as part of the whole tool. The sample +can be used as a working test tool. Or it can be expanded to include +more features. It also can be a integrated into a libary or other user +application to have more thorough test. + +The sample application takes err.conf as error configuation input. Gcc +compiles the code. After you install err_inject driver, you can run +this sample application to inject errors. + +Errata: Itanium 2 Processors Specification Update lists some errata against +the pal_mc_error_inject PAL procedure. The following err.conf has been tested +on latest Montecito PAL. + +err.conf: + +#This is configuration file for err_inject_tool. +#The format of the each line is: +#cpu, loop, interval, err_type_info, err_struct_info, err_data_buffer +#where +# cpu: logical cpu number the error will be inject in. +# loop: times the error will be injected. +# interval: In second. every so often one error is injected. +# err_type_info, err_struct_info: PAL parameters. +# +#Note: All values are hex w/o or w/ 0x prefix. + + +#On cpu2, inject only total 0x10 errors, interval 5 seconds +#corrected, data cache, hier-2, physical addr(assigned by tool code). +#working on Montecito latest PAL. +2, 10, 5, 4101, 95 + +#On cpu4, inject and consume total 0x10 errors, interval 5 seconds +#corrected, data cache, hier-2, physical addr(assigned by tool code). +#working on Montecito latest PAL. +4, 10, 5, 4109, 95 + +#On cpu15, inject and consume total 0x10 errors, interval 5 seconds +#recoverable, DTR0, hier-2. +#working on Montecito latest PAL. +0xf, 0x10, 5, 4249, 15 + +The sample application source code: + +err_injection_tool.c: + +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Copyright (C) 2006 Intel Co + * Fenghua Yu + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MAX_FN_SIZE 256 +#define MAX_BUF_SIZE 256 +#define DATA_BUF_SIZE 256 +#define NR_CPUS 512 +#define MAX_TASK_NUM 2048 +#define MIN_INTERVAL 5 // seconds +#define ERR_DATA_BUFFER_SIZE 3 // Three 8-byte. +#define PARA_FIELD_NUM 5 +#define MASK_SIZE (NR_CPUS/64) +#define PATH_FORMAT "/sys/devices/system/node/node0/cpu%d/err_inject/" + +int sched_setaffinity(pid_t pid, unsigned int len, unsigned long *mask); + +int verbose; +#define vbprintf if (verbose) printf + +int log_info(int cpu, const char *fmt, ...) +{ + FILE *log; + char fn[MAX_FN_SIZE]; + char buf[MAX_BUF_SIZE]; + va_list args; + + sprintf(fn, "%d.log", cpu); + log=fopen(fn, "a+"); + if (log==NULL) { + perror("Error open:"); + return -1; + } + + va_start(args, fmt); + vprintf(fmt, args); + memset(buf, 0, MAX_BUF_SIZE); + vsprintf(buf, fmt, args); + va_end(args); + + fwrite(buf, sizeof(buf), 1, log); + fclose(log); + + return 0; +} + +typedef unsigned long u64; +typedef unsigned int u32; + +typedef union err_type_info_u { + struct { + u64 mode : 3, /* 0-2 */ + err_inj : 3, /* 3-5 */ + err_sev : 2, /* 6-7 */ + err_struct : 5, /* 8-12 */ + struct_hier : 3, /* 13-15 */ + reserved : 48; /* 16-63 */ + } err_type_info_u; + u64 err_type_info; +} err_type_info_t; + +typedef union err_struct_info_u { + struct { + u64 siv : 1, /* 0 */ + c_t : 2, /* 1-2 */ + cl_p : 3, /* 3-5 */ + cl_id : 3, /* 6-8 */ + cl_dp : 1, /* 9 */ + reserved1 : 22, /* 10-31 */ + tiv : 1, /* 32 */ + trigger : 4, /* 33-36 */ + trigger_pl : 3, /* 37-39 */ + reserved2 : 24; /* 40-63 */ + } err_struct_info_cache; + struct { + u64 siv : 1, /* 0 */ + tt : 2, /* 1-2 */ + tc_tr : 2, /* 3-4 */ + tr_slot : 8, /* 5-12 */ + reserved1 : 19, /* 13-31 */ + tiv : 1, /* 32 */ + trigger : 4, /* 33-36 */ + trigger_pl : 3, /* 37-39 */ + reserved2 : 24; /* 40-63 */ + } err_struct_info_tlb; + struct { + u64 siv : 1, /* 0 */ + regfile_id : 4, /* 1-4 */ + reg_num : 7, /* 5-11 */ + reserved1 : 20, /* 12-31 */ + tiv : 1, /* 32 */ + trigger : 4, /* 33-36 */ + trigger_pl : 3, /* 37-39 */ + reserved2 : 24; /* 40-63 */ + } err_struct_info_register; + struct { + u64 reserved; + } err_struct_info_bus_processor_interconnect; + u64 err_struct_info; +} err_struct_info_t; + +typedef union err_data_buffer_u { + struct { + u64 trigger_addr; /* 0-63 */ + u64 inj_addr; /* 64-127 */ + u64 way : 5, /* 128-132 */ + index : 20, /* 133-152 */ + : 39; /* 153-191 */ + } err_data_buffer_cache; + struct { + u64 trigger_addr; /* 0-63 */ + u64 inj_addr; /* 64-127 */ + u64 way : 5, /* 128-132 */ + index : 20, /* 133-152 */ + reserved : 39; /* 153-191 */ + } err_data_buffer_tlb; + struct { + u64 trigger_addr; /* 0-63 */ + } err_data_buffer_register; + struct { + u64 reserved; /* 0-63 */ + } err_data_buffer_bus_processor_interconnect; + u64 err_data_buffer[ERR_DATA_BUFFER_SIZE]; +} err_data_buffer_t; + +typedef union capabilities_u { + struct { + u64 i : 1, + d : 1, + rv : 1, + tag : 1, + data : 1, + mesi : 1, + dp : 1, + reserved1 : 3, + pa : 1, + va : 1, + wi : 1, + reserved2 : 20, + trigger : 1, + trigger_pl : 1, + reserved3 : 30; + } capabilities_cache; + struct { + u64 d : 1, + i : 1, + rv : 1, + tc : 1, + tr : 1, + reserved1 : 27, + trigger : 1, + trigger_pl : 1, + reserved2 : 30; + } capabilities_tlb; + struct { + u64 gr_b0 : 1, + gr_b1 : 1, + fr : 1, + br : 1, + pr : 1, + ar : 1, + cr : 1, + rr : 1, + pkr : 1, + dbr : 1, + ibr : 1, + pmc : 1, + pmd : 1, + reserved1 : 3, + regnum : 1, + reserved2 : 15, + trigger : 1, + trigger_pl : 1, + reserved3 : 30; + } capabilities_register; + struct { + u64 reserved; + } capabilities_bus_processor_interconnect; +} capabilities_t; + +typedef struct resources_s { + u64 ibr0 : 1, + ibr2 : 1, + ibr4 : 1, + ibr6 : 1, + dbr0 : 1, + dbr2 : 1, + dbr4 : 1, + dbr6 : 1, + reserved : 48; +} resources_t; + + +long get_page_size(void) +{ + long page_size=sysconf(_SC_PAGESIZE); + return page_size; +} + +#define PAGE_SIZE (get_page_size()==-1?0x4000:get_page_size()) +#define SHM_SIZE (2*PAGE_SIZE*NR_CPUS) +#define SHM_VA 0x2000000100000000 + +int shmid; +void *shmaddr; + +int create_shm(void) +{ + key_t key; + char fn[MAX_FN_SIZE]; + + /* cpu0 is always existing */ + sprintf(fn, PATH_FORMAT, 0); + if ((key = ftok(fn, 's')) == -1) { + perror("ftok"); + return -1; + } + + shmid = shmget(key, SHM_SIZE, 0644 | IPC_CREAT); + if (shmid == -1) { + if (errno==EEXIST) { + shmid = shmget(key, SHM_SIZE, 0); + if (shmid == -1) { + perror("shmget"); + return -1; + } + } + else { + perror("shmget"); + return -1; + } + } + vbprintf("shmid=%d", shmid); + + /* connect to the segment: */ + shmaddr = shmat(shmid, (void *)SHM_VA, 0); + if (shmaddr == (void*)-1) { + perror("shmat"); + return -1; + } + + memset(shmaddr, 0, SHM_SIZE); + mlock(shmaddr, SHM_SIZE); + + return 0; +} + +int free_shm() +{ + munlock(shmaddr, SHM_SIZE); + shmdt(shmaddr); + semctl(shmid, 0, IPC_RMID); + + return 0; +} + +#ifdef _SEM_SEMUN_UNDEFINED +union semun +{ + int val; + struct semid_ds *buf; + unsigned short int *array; + struct seminfo *__buf; +}; +#endif + +u32 mode=1; /* 1: physical mode; 2: virtual mode. */ +int one_lock=1; +key_t key[NR_CPUS]; +int semid[NR_CPUS]; + +int create_sem(int cpu) +{ + union semun arg; + char fn[MAX_FN_SIZE]; + int sid; + + sprintf(fn, PATH_FORMAT, cpu); + sprintf(fn, "%s/%s", fn, "err_type_info"); + if ((key[cpu] = ftok(fn, 'e')) == -1) { + perror("ftok"); + return -1; + } + + if (semid[cpu]!=0) + return 0; + + /* clear old semaphore */ + if ((sid = semget(key[cpu], 1, 0)) != -1) + semctl(sid, 0, IPC_RMID); + + /* get one semaphore */ + if ((semid[cpu] = semget(key[cpu], 1, IPC_CREAT | IPC_EXCL)) == -1) { + perror("semget"); + printf("Please remove semaphore with key=0x%lx, then run the tool.\n", + (u64)key[cpu]); + return -1; + } + + vbprintf("semid[%d]=0x%lx, key[%d]=%lx\n",cpu,(u64)semid[cpu],cpu, + (u64)key[cpu]); + /* initialize the semaphore to 1: */ + arg.val = 1; + if (semctl(semid[cpu], 0, SETVAL, arg) == -1) { + perror("semctl"); + return -1; + } + + return 0; +} + +static int lock(int cpu) +{ + struct sembuf lock; + + lock.sem_num = cpu; + lock.sem_op = 1; + semop(semid[cpu], &lock, 1); + + return 0; +} + +static int unlock(int cpu) +{ + struct sembuf unlock; + + unlock.sem_num = cpu; + unlock.sem_op = -1; + semop(semid[cpu], &unlock, 1); + + return 0; +} + +void free_sem(int cpu) +{ + semctl(semid[cpu], 0, IPC_RMID); +} + +int wr_multi(char *fn, unsigned long *data, int size) +{ + int fd; + char buf[MAX_BUF_SIZE]; + int ret; + + if (size==1) + sprintf(buf, "%lx", *data); + else if (size==3) + sprintf(buf, "%lx,%lx,%lx", data[0], data[1], data[2]); + else { + fprintf(stderr,"write to file with wrong size!\n"); + return -1; + } + + fd=open(fn, O_RDWR); + if (!fd) { + perror("Error:"); + return -1; + } + ret=write(fd, buf, sizeof(buf)); + close(fd); + return ret; +} + +int wr(char *fn, unsigned long data) +{ + return wr_multi(fn, &data, 1); +} + +int rd(char *fn, unsigned long *data) +{ + int fd; + char buf[MAX_BUF_SIZE]; + + fd=open(fn, O_RDONLY); + if (fd<0) { + perror("Error:"); + return -1; + } + read(fd, buf, MAX_BUF_SIZE); + *data=strtoul(buf, NULL, 16); + close(fd); + return 0; +} + +int rd_status(char *path, int *status) +{ + char fn[MAX_FN_SIZE]; + sprintf(fn, "%s/status", path); + if (rd(fn, (u64*)status)<0) { + perror("status reading error.\n"); + return -1; + } + + return 0; +} + +int rd_capabilities(char *path, u64 *capabilities) +{ + char fn[MAX_FN_SIZE]; + sprintf(fn, "%s/capabilities", path); + if (rd(fn, capabilities)<0) { + perror("capabilities reading error.\n"); + return -1; + } + + return 0; +} + +int rd_all(char *path) +{ + unsigned long err_type_info, err_struct_info, err_data_buffer; + int status; + unsigned long capabilities, resources; + char fn[MAX_FN_SIZE]; + + sprintf(fn, "%s/err_type_info", path); + if (rd(fn, &err_type_info)<0) { + perror("err_type_info reading error.\n"); + return -1; + } + printf("err_type_info=%lx\n", err_type_info); + + sprintf(fn, "%s/err_struct_info", path); + if (rd(fn, &err_struct_info)<0) { + perror("err_struct_info reading error.\n"); + return -1; + } + printf("err_struct_info=%lx\n", err_struct_info); + + sprintf(fn, "%s/err_data_buffer", path); + if (rd(fn, &err_data_buffer)<0) { + perror("err_data_buffer reading error.\n"); + return -1; + } + printf("err_data_buffer=%lx\n", err_data_buffer); + + sprintf(fn, "%s/status", path); + if (rd("status", (u64*)&status)<0) { + perror("status reading error.\n"); + return -1; + } + printf("status=%d\n", status); + + sprintf(fn, "%s/capabilities", path); + if (rd(fn,&capabilities)<0) { + perror("capabilities reading error.\n"); + return -1; + } + printf("capabilities=%lx\n", capabilities); + + sprintf(fn, "%s/resources", path); + if (rd(fn, &resources)<0) { + perror("resources reading error.\n"); + return -1; + } + printf("resources=%lx\n", resources); + + return 0; +} + +int query_capabilities(char *path, err_type_info_t err_type_info, + u64 *capabilities) +{ + char fn[MAX_FN_SIZE]; + err_struct_info_t err_struct_info; + err_data_buffer_t err_data_buffer; + + err_struct_info.err_struct_info=0; + memset(err_data_buffer.err_data_buffer, -1, ERR_DATA_BUFFER_SIZE*8); + + sprintf(fn, "%s/err_type_info", path); + wr(fn, err_type_info.err_type_info); + sprintf(fn, "%s/err_struct_info", path); + wr(fn, 0x0); + sprintf(fn, "%s/err_data_buffer", path); + wr_multi(fn, err_data_buffer.err_data_buffer, ERR_DATA_BUFFER_SIZE); + + // Fire pal_mc_error_inject procedure. + sprintf(fn, "%s/call_start", path); + wr(fn, mode); + + if (rd_capabilities(path, capabilities)<0) + return -1; + + return 0; +} + +int query_all_capabilities() +{ + int status; + err_type_info_t err_type_info; + int err_sev, err_struct, struct_hier; + int cap=0; + u64 capabilities; + char path[MAX_FN_SIZE]; + + err_type_info.err_type_info=0; // Initial + err_type_info.err_type_info_u.mode=0; // Query mode; + err_type_info.err_type_info_u.err_inj=0; + + printf("All capabilities implemented in pal_mc_error_inject:\n"); + sprintf(path, PATH_FORMAT ,0); + for (err_sev=0;err_sev<3;err_sev++) + for (err_struct=0;err_struct<5;err_struct++) + for (struct_hier=0;struct_hier<5;struct_hier++) + { + status=-1; + capabilities=0; + err_type_info.err_type_info_u.err_sev=err_sev; + err_type_info.err_type_info_u.err_struct=err_struct; + err_type_info.err_type_info_u.struct_hier=struct_hier; + + if (query_capabilities(path, err_type_info, &capabilities)<0) + continue; + + if (rd_status(path, &status)<0) + continue; + + if (status==0) { + cap=1; + printf("For err_sev=%d, err_struct=%d, struct_hier=%d: ", + err_sev, err_struct, struct_hier); + printf("capabilities 0x%lx\n", capabilities); + } + } + if (!cap) { + printf("No capabilities supported.\n"); + return 0; + } + + return 0; +} + +int err_inject(int cpu, char *path, err_type_info_t err_type_info, + err_struct_info_t err_struct_info, + err_data_buffer_t err_data_buffer) +{ + int status; + char fn[MAX_FN_SIZE]; + + log_info(cpu, "err_type_info=%lx, err_struct_info=%lx, ", + err_type_info.err_type_info, + err_struct_info.err_struct_info); + log_info(cpu,"err_data_buffer=[%lx,%lx,%lx]\n", + err_data_buffer.err_data_buffer[0], + err_data_buffer.err_data_buffer[1], + err_data_buffer.err_data_buffer[2]); + sprintf(fn, "%s/err_type_info", path); + wr(fn, err_type_info.err_type_info); + sprintf(fn, "%s/err_struct_info", path); + wr(fn, err_struct_info.err_struct_info); + sprintf(fn, "%s/err_data_buffer", path); + wr_multi(fn, err_data_buffer.err_data_buffer, ERR_DATA_BUFFER_SIZE); + + // Fire pal_mc_error_inject procedure. + sprintf(fn, "%s/call_start", path); + wr(fn,mode); + + if (rd_status(path, &status)<0) { + vbprintf("fail: read status\n"); + return -100; + } + + if (status!=0) { + log_info(cpu, "fail: status=%d\n", status); + return status; + } + + return status; +} + +static int construct_data_buf(char *path, err_type_info_t err_type_info, + err_struct_info_t err_struct_info, + err_data_buffer_t *err_data_buffer, + void *va1) +{ + char fn[MAX_FN_SIZE]; + u64 virt_addr=0, phys_addr=0; + + vbprintf("va1=%lx\n", (u64)va1); + memset(&err_data_buffer->err_data_buffer_cache, 0, ERR_DATA_BUFFER_SIZE*8); + + switch (err_type_info.err_type_info_u.err_struct) { + case 1: // Cache + switch (err_struct_info.err_struct_info_cache.cl_id) { + case 1: //Virtual addr + err_data_buffer->err_data_buffer_cache.inj_addr=(u64)va1; + break; + case 2: //Phys addr + sprintf(fn, "%s/virtual_to_phys", path); + virt_addr=(u64)va1; + if (wr(fn,virt_addr)<0) + return -1; + rd(fn, &phys_addr); + err_data_buffer->err_data_buffer_cache.inj_addr=phys_addr; + break; + default: + printf("Not supported cl_id\n"); + break; + } + break; + case 2: // TLB + break; + case 3: // Register file + break; + case 4: // Bus/system interconnect + default: + printf("Not supported err_struct\n"); + break; + } + + return 0; +} + +typedef struct { + u64 cpu; + u64 loop; + u64 interval; + u64 err_type_info; + u64 err_struct_info; + u64 err_data_buffer[ERR_DATA_BUFFER_SIZE]; +} parameters_t; + +parameters_t line_para; +int para; + +static int empty_data_buffer(u64 *err_data_buffer) +{ + int empty=1; + int i; + + for (i=0;iMIN_INTERVAL + ?interval:MIN_INTERVAL; + parameters[num].err_type_info=err_type_info_conf; + parameters[num].err_struct_info=err_struct_info_conf; + memcpy(parameters[num++].err_data_buffer, + err_data_buffer_conf,ERR_DATA_BUFFER_SIZE*8) ; + + if (num>=MAX_TASK_NUM) + break; + } + } + else { + parameters[0].cpu=line_para.cpu; + parameters[0].loop=line_para.loop; + parameters[0].interval= line_para.interval>MIN_INTERVAL + ?line_para.interval:MIN_INTERVAL; + parameters[0].err_type_info=line_para.err_type_info; + parameters[0].err_struct_info=line_para.err_struct_info; + memcpy(parameters[0].err_data_buffer, + line_para.err_data_buffer,ERR_DATA_BUFFER_SIZE*8) ; + + num=1; + } + + /* Create semaphore: If one_lock, one semaphore for all processors. + Otherwise, one sempaphore for each processor. */ + if (one_lock) { + if (create_sem(0)) { + printf("Can not create semaphore...exit\n"); + free_sem(0); + return -1; + } + } + else { + for (i=0;i Date: Mon, 5 Feb 2007 15:47:43 -0800 Subject: [IA64] Fix example error injection program Progam accessed using /sys/devices/system/node/node0/cpu%d/err_inject/ This path only exists for CONFIG_NUMA=y systems. Better to use /sys/devices/system/cpu/cpu%d/err_inject/ which is available on all systems. Signed-off-by: Tony Luck --- Documentation/ia64/err_inject.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/ia64/err_inject.txt b/Documentation/ia64/err_inject.txt index 26487c172cf..6449a7090db 100644 --- a/Documentation/ia64/err_inject.txt +++ b/Documentation/ia64/err_inject.txt @@ -111,7 +111,7 @@ err_injection_tool.c: #define ERR_DATA_BUFFER_SIZE 3 // Three 8-byte. #define PARA_FIELD_NUM 5 #define MASK_SIZE (NR_CPUS/64) -#define PATH_FORMAT "/sys/devices/system/node/node0/cpu%d/err_inject/" +#define PATH_FORMAT "/sys/devices/system/cpu/cpu%d/err_inject/" int sched_setaffinity(pid_t pid, unsigned int len, unsigned long *mask); -- cgit v1.2.3 From ddd83eff58888928115b3e225a46d3c686e64594 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 30 Mar 2007 10:39:42 -0600 Subject: [IA64] update memory attribute aliasing documentation & test cases Updates documentation and adds some test cases. Signed-off-by: Bjorn Helgaas Signed-off-by: Tony Luck --- Documentation/ia64/aliasing-test.c | 247 +++++++++++++++++++++++++++++++++++++ Documentation/ia64/aliasing.txt | 71 ++++++----- 2 files changed, 284 insertions(+), 34 deletions(-) create mode 100644 Documentation/ia64/aliasing-test.c (limited to 'Documentation') diff --git a/Documentation/ia64/aliasing-test.c b/Documentation/ia64/aliasing-test.c new file mode 100644 index 00000000000..3153167b41c --- /dev/null +++ b/Documentation/ia64/aliasing-test.c @@ -0,0 +1,247 @@ +/* + * Exercise /dev/mem mmap cases that have been troublesome in the past + * + * (c) Copyright 2007 Hewlett-Packard Development Company, L.P. + * Bjorn Helgaas + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int sum; + +int map_mem(char *path, off_t offset, size_t length, int touch) +{ + int fd, rc; + void *addr; + int *c; + + fd = open(path, O_RDWR); + if (fd == -1) { + perror(path); + return -1; + } + + addr = mmap(NULL, length, PROT_READ|PROT_WRITE, MAP_SHARED, fd, offset); + if (addr == MAP_FAILED) + return 1; + + if (touch) { + c = (int *) addr; + while (c < (int *) (offset + length)) + sum += *c++; + } + + rc = munmap(addr, length); + if (rc == -1) { + perror("munmap"); + return -1; + } + + close(fd); + return 0; +} + +int scan_sysfs(char *path, char *file, off_t offset, size_t length, int touch) +{ + struct dirent **namelist; + char *name, *path2; + int i, n, r, rc, result = 0; + struct stat buf; + + n = scandir(path, &namelist, 0, alphasort); + if (n < 0) { + perror("scandir"); + return -1; + } + + for (i = 0; i < n; i++) { + name = namelist[i]->d_name; + + if (fnmatch(".", name, 0) == 0) + goto skip; + if (fnmatch("..", name, 0) == 0) + goto skip; + + path2 = malloc(strlen(path) + strlen(name) + 3); + strcpy(path2, path); + strcat(path2, "/"); + strcat(path2, name); + + if (fnmatch(file, name, 0) == 0) { + rc = map_mem(path2, offset, length, touch); + if (rc == 0) + fprintf(stderr, "PASS: %s 0x%lx-0x%lx is %s\n", path2, offset, offset + length, touch ? "readable" : "mappable"); + else if (rc > 0) + fprintf(stderr, "PASS: %s 0x%lx-0x%lx not mappable\n", path2, offset, offset + length); + else { + fprintf(stderr, "FAIL: %s 0x%lx-0x%lx not accessible\n", path2, offset, offset + length); + return rc; + } + } else { + r = lstat(path2, &buf); + if (r == 0 && S_ISDIR(buf.st_mode)) { + rc = scan_sysfs(path2, file, offset, length, touch); + if (rc < 0) + return rc; + } + } + + result |= rc; + free(path2); + +skip: + free(namelist[i]); + } + free(namelist); + return rc; +} + +char buf[1024]; + +int read_rom(char *path) +{ + int fd, rc; + size_t size = 0; + + fd = open(path, O_RDWR); + if (fd == -1) { + perror(path); + return -1; + } + + rc = write(fd, "1", 2); + if (rc <= 0) { + perror("write"); + return -1; + } + + do { + rc = read(fd, buf, sizeof(buf)); + if (rc > 0) + size += rc; + } while (rc > 0); + + close(fd); + return size; +} + +int scan_rom(char *path, char *file) +{ + struct dirent **namelist; + char *name, *path2; + int i, n, r, rc, result = 0; + struct stat buf; + + n = scandir(path, &namelist, 0, alphasort); + if (n < 0) { + perror("scandir"); + return -1; + } + + for (i = 0; i < n; i++) { + name = namelist[i]->d_name; + + if (fnmatch(".", name, 0) == 0) + goto skip; + if (fnmatch("..", name, 0) == 0) + goto skip; + + path2 = malloc(strlen(path) + strlen(name) + 3); + strcpy(path2, path); + strcat(path2, "/"); + strcat(path2, name); + + if (fnmatch(file, name, 0) == 0) { + rc = read_rom(path2); + + /* + * It's OK if the ROM is unreadable. Maybe there + * is no ROM, or some other error ocurred. The + * important thing is that no MCA happened. + */ + if (rc > 0) + fprintf(stderr, "PASS: %s read %ld bytes\n", path2, rc); + else { + fprintf(stderr, "PASS: %s not readable\n", path2); + return rc; + } + } else { + r = lstat(path2, &buf); + if (r == 0 && S_ISDIR(buf.st_mode)) { + rc = scan_rom(path2, file); + if (rc < 0) + return rc; + } + } + + result |= rc; + free(path2); + +skip: + free(namelist[i]); + } + free(namelist); + return rc; +} + +main() +{ + int rc; + + if (map_mem("/dev/mem", 0, 0xA0000, 1) == 0) + fprintf(stderr, "PASS: /dev/mem 0x0-0xa0000 is readable\n"); + else + fprintf(stderr, "FAIL: /dev/mem 0x0-0xa0000 not accessible\n"); + + /* + * It's not safe to blindly read the VGA frame buffer. If you know + * how to poke the card the right way, it should respond, but it's + * not safe in general. Many machines, e.g., Intel chipsets, cover + * up a non-responding card by just returning -1, but others will + * report the failure as a machine check. + */ + if (map_mem("/dev/mem", 0xA0000, 0x20000, 0) == 0) + fprintf(stderr, "PASS: /dev/mem 0xa0000-0xc0000 is mappable\n"); + else + fprintf(stderr, "FAIL: /dev/mem 0xa0000-0xc0000 not accessible\n"); + + if (map_mem("/dev/mem", 0xC0000, 0x40000, 1) == 0) + fprintf(stderr, "PASS: /dev/mem 0xc0000-0x100000 is readable\n"); + else + fprintf(stderr, "FAIL: /dev/mem 0xc0000-0x100000 not accessible\n"); + + /* + * Often you can map all the individual pieces above (0-0xA0000, + * 0xA0000-0xC0000, and 0xC0000-0x100000), but can't map the whole + * thing at once. This is because the individual pieces use different + * attributes, and there's no single attribute supported over the + * whole region. + */ + rc = map_mem("/dev/mem", 0, 1024*1024, 0); + if (rc == 0) + fprintf(stderr, "PASS: /dev/mem 0x0-0x100000 is mappable\n"); + else if (rc > 0) + fprintf(stderr, "PASS: /dev/mem 0x0-0x100000 not mappable\n"); + else + fprintf(stderr, "FAIL: /dev/mem 0x0-0x100000 not accessible\n"); + + scan_sysfs("/sys/class/pci_bus", "legacy_mem", 0, 0xA0000, 1); + scan_sysfs("/sys/class/pci_bus", "legacy_mem", 0xA0000, 0x20000, 0); + scan_sysfs("/sys/class/pci_bus", "legacy_mem", 0xC0000, 0x40000, 1); + scan_sysfs("/sys/class/pci_bus", "legacy_mem", 0, 1024*1024, 0); + + scan_rom("/sys/devices", "rom"); +} diff --git a/Documentation/ia64/aliasing.txt b/Documentation/ia64/aliasing.txt index 38f9a52d182..9a431a7d0f5 100644 --- a/Documentation/ia64/aliasing.txt +++ b/Documentation/ia64/aliasing.txt @@ -112,16 +112,6 @@ POTENTIAL ATTRIBUTE ALIASING CASES The /dev/mem mmap constraints apply. - However, since this is for mapping legacy MMIO space, WB access - does not make sense. This matters on machines without legacy - VGA support: these machines may have WB memory for the entire - first megabyte (or even the entire first granule). - - On these machines, we could mmap legacy_mem as WB, which would - be safe in terms of attribute aliasing, but X has no way of - knowing that it is accessing regular memory, not a frame buffer, - so the kernel should fail the mmap rather than doing it with WB. - read/write of /dev/mem This uses copy_from_user(), which implicitly uses a kernel @@ -138,14 +128,20 @@ POTENTIAL ATTRIBUTE ALIASING CASES ioremap() - This returns a kernel identity mapping for use inside the - kernel. + This returns a mapping for use inside the kernel. If the region is in kern_memmap, we should use the attribute - specified there. Otherwise, if the EFI memory map reports that - the entire granule supports WB, we should use that (granules - that are partially reserved or occupied by firmware do not appear - in kern_memmap). Otherwise, we should use a UC mapping. + specified there. + + If the EFI memory map reports that the entire granule supports + WB, we should use that (granules that are partially reserved + or occupied by firmware do not appear in kern_memmap). + + If the granule contains non-WB memory, but we can cover the + region safely with kernel page table mappings, we can use + ioremap_page_range() as most other architectures do. + + Failing all of the above, we have to fall back to a UC mapping. PAST PROBLEM CASES @@ -158,7 +154,7 @@ PAST PROBLEM CASES succeed. It may create either WB or UC user mappings, depending on whether the region is in kern_memmap or the EFI memory map. - mmap of 0x0-0xA0000 /dev/mem by "hwinfo" on HP sx1000 with VGA enabled + mmap of 0x0-0x9FFFF /dev/mem by "hwinfo" on HP sx1000 with VGA enabled See https://bugzilla.novell.com/show_bug.cgi?id=140858. @@ -171,28 +167,25 @@ PAST PROBLEM CASES so it is safe to use WB mappings. The kernel VGA driver may ioremap the VGA frame buffer at 0xA0000, - which will use a granule-sized UC mapping covering 0-0xFFFFF. This - granule covers some WB-only memory, but since UC is non-speculative, - the processor will never generate an uncacheable reference to the - WB-only areas unless the driver explicitly touches them. + which uses a granule-sized UC mapping. This granule will cover some + WB-only memory, but since UC is non-speculative, the processor will + never generate an uncacheable reference to the WB-only areas unless + the driver explicitly touches them. mmap of 0x0-0xFFFFF legacy_mem by "X" - If the EFI memory map reports this entire range as WB, there - is no VGA MMIO hole, and the mmap should fail or be done with - a WB mapping. + If the EFI memory map reports that the entire range supports the + same attributes, we can allow the mmap (and we will prefer WB if + supported, as is the case with HP sx[12]000 machines with VGA + disabled). - There's no easy way for X to determine whether the 0xA0000-0xBFFFF - region is a frame buffer or just memory, so I think it's best to - just fail this mmap request rather than using a WB mapping. As - far as I know, there's no need to map legacy_mem with WB - mappings. + If EFI reports the range as partly WB and partly UC (as on sx[12]000 + machines with VGA enabled), we must fail the mmap because there's no + safe attribute to use. - Otherwise, a UC mapping of the entire region is probably safe. - The VGA hole means the region will not be in kern_memmap. The - HP sx1000 chipset doesn't support UC access to the memory surrounding - the VGA hole, but X doesn't need that area anyway and should not - reference it. + If EFI reports some of the range but not all (as on Intel firmware + that doesn't report the VGA frame buffer at all), we should fail the + mmap and force the user to map just the specific region of interest. mmap of 0xA0000-0xBFFFF legacy_mem by "X" on HP sx1000 with VGA disabled @@ -202,6 +195,16 @@ PAST PROBLEM CASES This is a special case of the previous case, and the mmap should fail for the same reason as above. + read of /sys/devices/.../rom + + For VGA devices, this may cause an ioremap() of 0xC0000. This + used to be done with a UC mapping, because the VGA frame buffer + at 0xA0000 prevents use of a WB granule. The UC mapping causes + an MCA on HP sx[12]000 chipsets. + + We should use WB page table mappings to avoid covering the VGA + frame buffer. + NOTES [1] SDM rev 2.2, vol 2, sec 4.4.1. -- cgit v1.2.3 From b73c3d778647bffff6dbcbe41e8fc01215a22194 Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Mon, 26 Mar 2007 21:59:33 -0800 Subject: [SCSI] Remove some unused SCSI-related kernel config variables. Remove the unused SCSI-related kernel config variables SCSI_NCR53C8XX_PROFILE_SUPPORT SCSI_NCR53C8XX_PROFILE 53C700_IO_MAPPED AIC79XX_ENABLE_RD_STRM AIC7XXX_PROBE_EISA_VL Signed-off-by: Robert P. J. Day Signed-off-by: Andrew Morton Signed-off-by: James Bottomley --- Documentation/scsi/ncr53c8xx.txt | 5 ----- 1 file changed, 5 deletions(-) (limited to 'Documentation') diff --git a/Documentation/scsi/ncr53c8xx.txt b/Documentation/scsi/ncr53c8xx.txt index caf10b15518..88ef88b949f 100644 --- a/Documentation/scsi/ncr53c8xx.txt +++ b/Documentation/scsi/ncr53c8xx.txt @@ -562,11 +562,6 @@ if only one has a flaw for some SCSI feature, you can disable the support by the driver of this feature at linux start-up and enable this feature after boot-up only for devices that support it safely. -CONFIG_SCSI_NCR53C8XX_PROFILE_SUPPORT (default answer: n) - This option must be set for profiling information to be gathered - and printed out through the proc file system. This features may - impact performances. - CONFIG_SCSI_NCR53C8XX_IOMAPPED (default answer: n) Answer "y" if you suspect your mother board to not allow memory mapped I/O. May slow down performance a little. This option is required by -- cgit v1.2.3 From 10c9a017f1bd84a7aedaea7029cd5224863197db Mon Sep 17 00:00:00 2001 From: "Salyzyn, Mark" Date: Tue, 27 Mar 2007 11:51:34 -0400 Subject: [SCSI] aacraid: Add SMC and SUN products to README Add SMC and SUN products to aacraid documentation Signed-off-by: Mark Salyzyn Signed-off-by: James Bottomley --- Documentation/scsi/aacraid.txt | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'Documentation') diff --git a/Documentation/scsi/aacraid.txt b/Documentation/scsi/aacraid.txt index dc8e44fc650..bddc641dddb 100644 --- a/Documentation/scsi/aacraid.txt +++ b/Documentation/scsi/aacraid.txt @@ -37,7 +37,11 @@ Supported Cards/Chipsets 9005:0286:9005:029d Adaptec 2420SA (Intruder HP release) 9005:0286:9005:02ac Adaptec 1800 (Typhoon44) 9005:0285:9005:02b5 Adaptec 5445 (Voodoo44) + 9005:0285:15d9:02b5 SMC AOC-USAS-S4i + 9005:0285:15d9:02c9 SMC AOC-USAS-S4iB 9005:0285:9005:02b6 Adaptec 5805 (Voodoo80) + 9005:0285:15d9:02b6 SMC AOC-USAS-S8i + 9005:0285:15d9:02ca SMC AOC-USAS-S8iB 9005:0285:9005:02b7 Adaptec 5085 (Voodoo08) 9005:0285:9005:02bb Adaptec 3405 (Marauder40LP) 9005:0285:9005:02bc Adaptec 3805 (Marauder80LP) @@ -93,6 +97,9 @@ Supported Cards/Chipsets 9005:0286:9005:02ae (Aurora Lite ARK) 9005:0285:9005:02b0 (Sunrise Lake ARK) 9005:0285:9005:02b1 Adaptec (Voodoo 8 internal 8 external) + 9005:0285:108e:7aac SUN STK RAID REM (Voodoo44 Coyote) + 9005:0285:108e:0286 SUN SG-XPCIESAS-R-IN (Cougar) + 9005:0285:108e:0287 SUN SG-XPCIESAS-R-EX (Prometheus) People ------------------------- -- cgit v1.2.3 From 144ff8bf0d84a81806bfb5979b0a6b176b1fcace Mon Sep 17 00:00:00 2001 From: "Salyzyn, Mark" Date: Wed, 4 Apr 2007 15:49:54 -0400 Subject: [SCSI] aacraid: Correct SMC products in aacraid.txt Correct a spelling mistake for the SMC product names (replace 'B' with 'R') in the Documentation/scsi/aacraid.txt file. This is a follow-up to a documentation patch '[PATCH] aacraid: Add SMC and SUN products to README' submitted and accepted to scsi-misc-2.6 on March 27 2007. Signed-off-by: Mark Salyzyn Signed-off-by: James Bottomley --- Documentation/scsi/aacraid.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/scsi/aacraid.txt b/Documentation/scsi/aacraid.txt index bddc641dddb..2368e7e4a8c 100644 --- a/Documentation/scsi/aacraid.txt +++ b/Documentation/scsi/aacraid.txt @@ -38,10 +38,10 @@ Supported Cards/Chipsets 9005:0286:9005:02ac Adaptec 1800 (Typhoon44) 9005:0285:9005:02b5 Adaptec 5445 (Voodoo44) 9005:0285:15d9:02b5 SMC AOC-USAS-S4i - 9005:0285:15d9:02c9 SMC AOC-USAS-S4iB + 9005:0285:15d9:02c9 SMC AOC-USAS-S4iR 9005:0285:9005:02b6 Adaptec 5805 (Voodoo80) 9005:0285:15d9:02b6 SMC AOC-USAS-S8i - 9005:0285:15d9:02ca SMC AOC-USAS-S8iB + 9005:0285:15d9:02ca SMC AOC-USAS-S8iR 9005:0285:9005:02b7 Adaptec 5085 (Voodoo08) 9005:0285:9005:02bb Adaptec 3405 (Marauder40LP) 9005:0285:9005:02bc Adaptec 3805 (Marauder80LP) -- cgit v1.2.3 From 7053acbd78336abf5d4bc3d8a875a03624cfb83f Mon Sep 17 00:00:00 2001 From: Eric Miao Date: Thu, 5 Apr 2007 04:07:20 +0100 Subject: [ARM] 4304/1: removes the unnecessary bit number from CKENnn_XXXX This patch removes the unnecessary bit number from CKENnn_XXXX definitions for PXA, so that CKEN0_PWM0 --> CKEN_PWM0 CKEN1_PWM1 --> CKEN_PWM1 ... CKEN24_CAMERA --> CKEN_CAMERA The reasons for the change of these defitions are: 1. they do not scale - they are currently valid for pxa2xx, but definitely not valid for pxa3xx, e.g., pxa3xx has bit 3 for camera instead of bit 24 2. they are unnecessary - the peripheral name within the definition has already announced its usage, we don't need those bit numbers to know which peripheral we are going to enable/disable clock for 3. they are inconvenient - think about this: a driver programmer for pxa has to remember which bit in the CKEN register to turn on/off Another change in the patch is to make the definitions equal to its clock bit index, so that #define CKEN_CAMERA (24) instead of #define CKEN_CAMERA (1 << 24) this change, however, will add a run-time bit shift operation in pxa_set_cken(), but the benefit of this change is that it scales when bit index exceeds 32, e.g., pxa3xx has two registers CKENA and CKENB, totally 64 bit for this, suppose CAMERA clock enabling bit is CKENB:10, one can simply define CKEN_CAMERA to be (32 + 10) and so that pxa_set_cken() need minimum change to adapt to that. Signed-off-by: eric miao Signed-off-by: Russell King --- Documentation/spi/pxa2xx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/spi/pxa2xx b/Documentation/spi/pxa2xx index f9717fe9bd8..215e3b8e726 100644 --- a/Documentation/spi/pxa2xx +++ b/Documentation/spi/pxa2xx @@ -62,7 +62,7 @@ static struct resource pxa_spi_nssp_resources[] = { static struct pxa2xx_spi_master pxa_nssp_master_info = { .ssp_type = PXA25x_NSSP, /* Type of SSP */ - .clock_enable = CKEN9_NSSP, /* NSSP Peripheral clock */ + .clock_enable = CKEN_NSSP, /* NSSP Peripheral clock */ .num_chipselect = 1, /* Matches the number of chips attached to NSSP */ .enable_dma = 1, /* Enables NSSP DMA */ }; -- cgit v1.2.3 From 85796e7d939a39787f10a643477298678fed85db Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sun, 29 Apr 2007 23:42:08 -0400 Subject: Input: update some documentation Input-programming.txt got out of sync with the latest changes in input core; let's refresh it. Signed-off-by: Dmitry Torokhov --- Documentation/input/input-programming.txt | 125 +++++++++++++++++------------- 1 file changed, 72 insertions(+), 53 deletions(-) (limited to 'Documentation') diff --git a/Documentation/input/input-programming.txt b/Documentation/input/input-programming.txt index 180e0689676..d9d523099bb 100644 --- a/Documentation/input/input-programming.txt +++ b/Documentation/input/input-programming.txt @@ -1,5 +1,3 @@ -$Id: input-programming.txt,v 1.4 2001/05/04 09:47:14 vojtech Exp $ - Programming input drivers ~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -20,28 +18,51 @@ pressed or released a BUTTON_IRQ happens. The driver could look like: #include #include +static struct input_dev *button_dev; + static void button_interrupt(int irq, void *dummy, struct pt_regs *fp) { - input_report_key(&button_dev, BTN_1, inb(BUTTON_PORT) & 1); - input_sync(&button_dev); + input_report_key(button_dev, BTN_1, inb(BUTTON_PORT) & 1); + input_sync(button_dev); } static int __init button_init(void) { + int error; + if (request_irq(BUTTON_IRQ, button_interrupt, 0, "button", NULL)) { printk(KERN_ERR "button.c: Can't allocate irq %d\n", button_irq); return -EBUSY; } - - button_dev.evbit[0] = BIT(EV_KEY); - button_dev.keybit[LONG(BTN_0)] = BIT(BTN_0); - - input_register_device(&button_dev); + + button_dev = input_allocate_device(); + if (!button_dev) { + printk(KERN_ERR "button.c: Not enough memory\n"); + error = -ENOMEM; + goto err_free_irq; + } + + button_dev->evbit[0] = BIT(EV_KEY); + button_dev->keybit[LONG(BTN_0)] = BIT(BTN_0); + + error = input_register_device(button_dev); + if (error) { + printk(KERN_ERR "button.c: Failed to register device\n"); + goto err_free_dev; + } + + return 0; + + err_free_dev: + input_free_device(button_dev); + err_free_irq: + free_irq(BUTTON_IRQ, button_interrupt); + return error; } static void __exit button_exit(void) { - input_unregister_device(&button_dev); + input_unregister_device(button_dev); free_irq(BUTTON_IRQ, button_interrupt); } @@ -58,17 +79,18 @@ In the _init function, which is called either upon module load or when booting the kernel, it grabs the required resources (it should also check for the presence of the device). -Then it sets the input bitfields. This way the device driver tells the other +Then it allocates a new input device structure with input_aloocate_device() +and sets up input bitfields. This way the device driver tells the other parts of the input systems what it is - what events can be generated or -accepted by this input device. Our example device can only generate EV_KEY type -events, and from those only BTN_0 event code. Thus we only set these two -bits. We could have used +accepted by this input device. Our example device can only generate EV_KEY +type events, and from those only BTN_0 event code. Thus we only set these +two bits. We could have used set_bit(EV_KEY, button_dev.evbit); set_bit(BTN_0, button_dev.keybit); as well, but with more than single bits the first approach tends to be -shorter. +shorter. Then the example driver registers the input device structure by calling @@ -76,16 +98,15 @@ Then the example driver registers the input device structure by calling This adds the button_dev structure to linked lists of the input driver and calls device handler modules _connect functions to tell them a new input -device has appeared. Because the _connect functions may call kmalloc(, -GFP_KERNEL), which can sleep, input_register_device() must not be called -from an interrupt or with a spinlock held. +device has appeared. input_register_device() may sleep and therefore must +not be called from an interrupt or with a spinlock held. While in use, the only used function of the driver is button_interrupt() which upon every interrupt from the button checks its state and reports it -via the +via the input_report_key() @@ -113,16 +134,10 @@ can use the open and close callback to know when it can stop polling or release the interrupt and when it must resume polling or grab the interrupt again. To do that, we would add this to our example driver: -int button_used = 0; - static int button_open(struct input_dev *dev) { - if (button_used++) - return 0; - if (request_irq(BUTTON_IRQ, button_interrupt, 0, "button", NULL)) { printk(KERN_ERR "button.c: Can't allocate irq %d\n", button_irq); - button_used--; return -EBUSY; } @@ -131,20 +146,21 @@ static int button_open(struct input_dev *dev) static void button_close(struct input_dev *dev) { - if (!--button_used) - free_irq(IRQ_AMIGA_VERTB, button_interrupt); + free_irq(IRQ_AMIGA_VERTB, button_interrupt); } static int __init button_init(void) { ... - button_dev.open = button_open; - button_dev.close = button_close; + button_dev->open = button_open; + button_dev->close = button_close; ... } -Note the button_used variable - we have to track how many times the open -function was called to know when exactly our device stops being used. +Note that input core keeps track of number of users for the device and +makes sure that dev->open() is called only when the first user connects +to the device and that dev->close() is called when the very last user +disconnects. Calls to both callbacks are serialized. The open() callback should return a 0 in case of success or any nonzero value in case of failure. The close() callback (which is void) must always succeed. @@ -175,7 +191,7 @@ set the corresponding bits and call the input_report_rel(struct input_dev *dev, int code, int value) -function. Events are generated only for nonzero value. +function. Events are generated only for nonzero value. However EV_ABS requires a little special care. Before calling input_register_device, you have to fill additional fields in the input_dev @@ -187,6 +203,10 @@ the ABS_X axis: button_dev.absfuzz[ABS_X] = 4; button_dev.absflat[ABS_X] = 8; +Or, you can just say: + + input_set_abs_params(button_dev, ABS_X, 0, 255, 4, 8); + This setting would be appropriate for a joystick X axis, with the minimum of 0, maximum of 255 (which the joystick *must* be able to reach, no problem if it sometimes reports more, but it must be able to always reach the min and @@ -197,14 +217,7 @@ If you don't need absfuzz and absflat, you can set them to zero, which mean that the thing is precise and always returns to exactly the center position (if it has any). -1.4 The void *private field -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -This field in the input structure can be used to point to any private data -structures in the input device driver, in case the driver handles more than -one device. You'll need it in the open and close callbacks. - -1.5 NBITS(), LONG(), BIT() +1.4 NBITS(), LONG(), BIT() ~~~~~~~~~~~~~~~~~~~~~~~~~~ These three macros from input.h help some bitfield computations: @@ -213,13 +226,9 @@ These three macros from input.h help some bitfield computations: LONG(x) - returns the index in the array in longs for bit x BIT(x) - returns the index in a long for bit x -1.6 The number, id* and name fields +1.5 The id* and name fields ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The dev->number is assigned by the input system to the input device when it -is registered. It has no use except for identifying the device to the user -in system messages. - The dev->name should be set before registering the input device by the input device driver. It's a string like 'Generic button device' containing a user friendly name of the device. @@ -234,15 +243,25 @@ driver. The id and name fields can be passed to userland via the evdev interface. -1.7 The keycode, keycodemax, keycodesize fields +1.6 The keycode, keycodemax, keycodesize fields ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -These two fields will be used for any input devices that report their data -as scancodes. If not all scancodes can be known by autodetection, they may -need to be set by userland utilities. The keycode array then is an array -used to map from scancodes to input system keycodes. The keycode max will -contain the size of the array and keycodesize the size of each entry in it -(in bytes). +These three fields should be used by input devices that have dense keymaps. +The keycode is an array used to map from scancodes to input system keycodes. +The keycode max should contain the size of the array and keycodesize the +size of each entry in it (in bytes). + +Userspace can query and alter current scancode to keycode mappings using +EVIOCGKEYCODE and EVIOCSKEYCODE ioctls on corresponding evdev interface. +When a device has all 3 aforementioned fields filled in, the driver may +rely on kernel's default implementation of setting and querying keycode +mappings. + +1.7 dev->getkeycode() and dev->setkeycode() +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +getkeycode() and setkeycode() callbacks allow drivers to override default +keycode/keycodesize/keycodemax mapping mechanism provided by input core +and implement sparse keycode maps. 1.8 Key autorepeat ~~~~~~~~~~~~~~~~~~ @@ -266,7 +285,7 @@ direction - from the system to the input device driver. If your input device driver can handle these events, it has to set the respective bits in evbit, *and* also the callback routine: - button_dev.event = button_event; + button_dev->event = button_event; int button_event(struct input_dev *dev, unsigned int type, unsigned int code, int value); { -- cgit v1.2.3 From fccb56e4d82132ac15359efc9e419371e4533437 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 1 May 2007 23:26:27 +0200 Subject: i2c: Kill i2c_adapter.class_dev Kill i2c_adapter.class_dev. Instead, set the class of i2c_adapter.dev to i2c_adapter_class, so that a symlink will be created for every i2c_adapter in /sys/class/i2c-adapter. The same change must be mirrored to i2c-isa as it duplicates some of the i2c-core functionalities. User-space tools and libraries might need some adjustments. In particular, libsensors from lm_sensors 2.10.3 or later is required for proper discovery of i2c adapter names after this change. Signed-off-by: Jean Delvare --- Documentation/feature-removal-schedule.txt | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'Documentation') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 5c88ba1ea26..c4b3bdad15d 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -190,18 +190,10 @@ Who: Jean Delvare --------------------------- -What: i2c_adapter.dev - i2c_adapter.list +What: i2c_adapter.list When: July 2007 -Why: Superfluous, given i2c_adapter.class_dev: - * The "dev" was a stand-in for the physical device node that legacy - drivers would not have; but now it's almost always present. Any - remaining legacy drivers must upgrade (they now trigger warnings). - * The "list" duplicates class device children. - The delay in removing this is so upgraded lm_sensors and libsensors - can get deployed. (Removal causes minor changes in the sysfs layout, - notably the location of the adapter type name and parenting the i2c - client hardware directly from their controller.) +Why: Superfluous, this list duplicates the one maintained by the driver + core. Who: Jean Delvare , David Brownell -- cgit v1.2.3 From f75803de6ae9aaebaf096d4590b40503c896eca7 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 1 May 2007 23:26:29 +0200 Subject: i2c-nforce2: Add support for the MCP61 and MCP65 Signed-off-by: Jean Delvare Cc: Hans-Frieder Vogt --- Documentation/i2c/busses/i2c-nforce2 | 2 ++ 1 file changed, 2 insertions(+) (limited to 'Documentation') diff --git a/Documentation/i2c/busses/i2c-nforce2 b/Documentation/i2c/busses/i2c-nforce2 index 7f61fbc03f7..fae3495bcba 100644 --- a/Documentation/i2c/busses/i2c-nforce2 +++ b/Documentation/i2c/busses/i2c-nforce2 @@ -9,6 +9,8 @@ Supported adapters: * nForce4 MCP-04 10de:0034 * nForce4 MCP51 10de:0264 * nForce4 MCP55 10de:0368 + * nForce4 MCP61 10de:03EB + * nForce4 MCP65 10de:0446 Datasheet: not publicly available, but seems to be similar to the AMD-8111 SMBus 2.0 adapter. -- cgit v1.2.3 From 4298cfc3eb6110df989f784be516c6340c597a66 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Tue, 1 May 2007 23:26:31 +0200 Subject: i2c: i2c probe() and remove() documented Update Documentation/i2c to match previous patches updating probe() and remove() logic. Signed-off-by: David Brownell Signed-off-by: Jean Delvare --- Documentation/i2c/summary | 29 +++++++++----- Documentation/i2c/writing-clients | 82 ++++++++++++++++++++++++++++++++++----- 2 files changed, 92 insertions(+), 19 deletions(-) (limited to 'Documentation') diff --git a/Documentation/i2c/summary b/Documentation/i2c/summary index 41dde877679..aea60bf7e8f 100644 --- a/Documentation/i2c/summary +++ b/Documentation/i2c/summary @@ -4,17 +4,23 @@ I2C and SMBus ============= I2C (pronounce: I squared C) is a protocol developed by Philips. It is a -slow two-wire protocol (10-400 kHz), but it suffices for many types of -devices. +slow two-wire protocol (variable speed, up to 400 kHz), with a high speed +extension (3.4 MHz). It provides an inexpensive bus for connecting many +types of devices with infrequent or low bandwidth communications needs. +I2C is widely used with embedded systems. Some systems use variants that +don't meet branding requirements, and so are not advertised as being I2C. -SMBus (System Management Bus) is a subset of the I2C protocol. Many -modern mainboards have a System Management Bus. There are a lot of -devices which can be connected to a SMBus; the most notable are modern -memory chips with EEPROM memories and chips for hardware monitoring. +SMBus (System Management Bus) is based on the I2C protocol, and is mostly +a subset of I2C protocols and signaling. Many I2C devices will work on an +SMBus, but some SMBus protocols add semantics beyond what is required to +achieve I2C branding. Modern PC mainboards rely on SMBus. The most common +devices connected through SMBus are RAM modules configured using I2C EEPROMs, +and hardware monitoring chips. -Because the SMBus is just a special case of the generalized I2C bus, we -can simulate the SMBus protocol on plain I2C busses. The reverse is -regretfully impossible. +Because the SMBus is mostly a subset of the generalized I2C bus, we can +use its protocols on many I2C systems. However, there are systems that don't +meet both SMBus and I2C electrical constraints; and others which can't +implement all the common SMBus protocol semantics or messages. Terminology @@ -29,6 +35,7 @@ When we talk about I2C, we use the following terms: An Algorithm driver contains general code that can be used for a whole class of I2C adapters. Each specific adapter driver depends on one algorithm driver. + A Driver driver (yes, this sounds ridiculous, sorry) contains the general code to access some type of device. Each detected device gets its own data in the Client structure. Usually, Driver and Client are more closely @@ -40,6 +47,10 @@ a separate Adapter and Algorithm driver), and drivers for your I2C devices in this package. See the lm_sensors project http://www.lm-sensors.nu for device drivers. +At this time, Linux only operates I2C (or SMBus) in master mode; you can't +use these APIs to make a Linux system behave as a slave/device, either to +speak a custom protocol or to emulate some other device. + Included Bus Drivers ==================== diff --git a/Documentation/i2c/writing-clients b/Documentation/i2c/writing-clients index fbcff96f4ca..54255fd68ec 100644 --- a/Documentation/i2c/writing-clients +++ b/Documentation/i2c/writing-clients @@ -1,5 +1,5 @@ This is a small guide for those who want to write kernel drivers for I2C -or SMBus devices. +or SMBus devices, using Linux as the protocol host/master (not slave). To set up a driver, you need to do several things. Some are optional, and some things can be done slightly or completely different. Use this as a @@ -29,8 +29,16 @@ static struct i2c_driver foo_driver = { .driver = { .name = "foo", }, + + /* iff driver uses driver model ("new style") binding model: */ + .probe = foo_probe, + .remove = foo_remove, + + /* else, driver uses "legacy" binding model: */ .attach_adapter = foo_attach_adapter, .detach_client = foo_detach_client, + + /* these may be used regardless of the driver binding model */ .shutdown = foo_shutdown, /* optional */ .suspend = foo_suspend, /* optional */ .resume = foo_resume, /* optional */ @@ -40,7 +48,8 @@ static struct i2c_driver foo_driver = { The name field is the driver name, and must not contain spaces. It should match the module name (if the driver can be compiled as a module), although you can use MODULE_ALIAS (passing "foo" in this example) to add -another name for the module. +another name for the module. If the driver name doesn't match the module +name, the module won't be automatically loaded (hotplug/coldplug). All other fields are for call-back functions which will be explained below. @@ -141,6 +150,59 @@ Writing is done the same way. Probing and attaching ===================== +The Linux I2C stack was originally written to support access to hardware +monitoring chips on PC motherboards, and thus it embeds some assumptions +that are more appropriate to SMBus (and PCs) than to I2C. One of these +assumptions is that most adapters and devices drivers support the SMBUS_QUICK +protocol to probe device presence. Another is that devices and their drivers +can be sufficiently configured using only such probe primitives. + +As Linux and its I2C stack became more widely used in embedded systems +and complex components such as DVB adapters, those assumptions became more +problematic. Drivers for I2C devices that issue interrupts need more (and +different) configuration information, as do drivers handling chip variants +that can't be distinguished by protocol probing, or which need some board +specific information to operate correctly. + +Accordingly, the I2C stack now has two models for associating I2C devices +with their drivers: the original "legacy" model, and a newer one that's +fully compatible with the Linux 2.6 driver model. These models do not mix, +since the "legacy" model requires drivers to create "i2c_client" device +objects after SMBus style probing, while the Linux driver model expects +drivers to be given such device objects in their probe() routines. + + +Standard Driver Model Binding ("New Style") +------------------------------------------- + +System infrastructure, typically board-specific initialization code or +boot firmware, reports what I2C devices exist. For example, there may be +a table, in the kernel or from the boot loader, identifying I2C devices +and linking them to board-specific configuration information about IRQs +and other wiring artifacts, chip type, and so on. That could be used to +create i2c_client objects for each I2C device. + +I2C device drivers using this binding model work just like any other +kind of driver in Linux: they provide a probe() method to bind to +those devices, and a remove() method to unbind. + + static int foo_probe(struct i2c_client *client); + static int foo_remove(struct i2c_client *client); + +Remember that the i2c_driver does not create those client handles. The +handle may be used during foo_probe(). If foo_probe() reports success +(zero not a negative status code) it may save the handle and use it until +foo_remove() returns. That binding model is used by most Linux drivers. + +Drivers match devices when i2c_client.driver_name and the driver name are +the same; this approach is used in several other busses that don't have +device typing support in the hardware. The driver and module name should +match, so hotplug/coldplug mechanisms will modprobe the driver. + + +Legacy Driver Binding Model +--------------------------- + Most i2c devices can be present on several i2c addresses; for some this is determined in hardware (by soldering some chip pins to Vcc or Ground), for others this can be changed in software (by writing to specific client @@ -162,8 +224,8 @@ NOTE: If you want to write a `sensors' driver, the interface is slightly -Probing classes ---------------- +Probing classes (Legacy model) +------------------------------ All parameters are given as lists of unsigned 16-bit integers. Lists are terminated by I2C_CLIENT_END. @@ -210,8 +272,8 @@ Note that you *have* to call the defined variable `normal_i2c', without any prefix! -Attaching to an adapter ------------------------ +Attaching to an adapter (Legacy model) +-------------------------------------- Whenever a new adapter is inserted, or for all adapters if the driver is being registered, the callback attach_adapter() is called. Now is the @@ -237,8 +299,8 @@ them (unless a `force' parameter was used). In addition, addresses that are already in use (by some other registered client) are skipped. -The detect client function --------------------------- +The detect client function (Legacy model) +----------------------------------------- The detect client function is called by i2c_probe. The `kind' parameter contains -1 for a probed detection, 0 for a forced detection, or a positive @@ -427,8 +489,8 @@ For now, you can ignore the `flags' parameter. It is there for future use. } -Removing the client -=================== +Removing the client (Legacy model) +================================== The detach_client call back function is called when a client should be removed. It may actually fail, but only when panicking. This code is -- cgit v1.2.3 From ce9e0794c23fb1d0222cb10009a198b427dcf6ad Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 1 May 2007 23:26:32 +0200 Subject: i2c: Document i2c_new_device() Document the new i2c_new_device(), i2c_new_probed_device() and i2c_unregister_device() functions. Signed-off-by: Jean Delvare --- Documentation/i2c/writing-clients | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'Documentation') diff --git a/Documentation/i2c/writing-clients b/Documentation/i2c/writing-clients index 54255fd68ec..e62fbfa1282 100644 --- a/Documentation/i2c/writing-clients +++ b/Documentation/i2c/writing-clients @@ -200,6 +200,44 @@ device typing support in the hardware. The driver and module name should match, so hotplug/coldplug mechanisms will modprobe the driver. +Device Creation (Standard driver model) +--------------------------------------- + +If you know for a fact that an I2C device is connected to a given I2C bus, +you can instantiate that device by simply filling an i2c_board_info +structure with the device address and driver name, and calling +i2c_new_device(). This will create the device, then the driver core will +take care of finding the right driver and will call its probe() method. +If a driver supports different device types, you can specify the type you +want using the type field. You can also specify an IRQ and platform data +if needed. + +Sometimes you know that a device is connected to a given I2C bus, but you +don't know the exact address it uses. This happens on TV adapters for +example, where the same driver supports dozens of slightly different +models, and I2C device addresses change from one model to the next. In +that case, you can use the i2c_new_probed_device() variant, which is +similar to i2c_new_device(), except that it takes an additional list of +possible I2C addresses to probe. A device is created for the first +responsive address in the list. If you expect more than one device to be +present in the address range, simply call i2c_new_probed_device() that +many times. + +The call to i2c_new_device() or i2c_new_probed_device() typically happens +in the I2C bus driver. You may want to save the returned i2c_client +reference for later use. + + +Device Deletion (Standard driver model) +--------------------------------------- + +Each I2C device which has been created using i2c_new_device() or +i2c_new_probed_device() can be unregistered by calling +i2c_unregister_device(). If you don't call it explicitly, it will be +called automatically before the underlying I2C bus itself is removed, as a +device can't survive its parent in the device driver model. + + Legacy Driver Binding Model --------------------------- -- cgit v1.2.3 From b3e820968ad47219f7d559117a30e85cf96b4e4e Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 1 May 2007 23:26:32 +0200 Subject: i2c: Make i2c_del_driver a void function Make i2c_del_driver a void function, like all other driver removal functions. It always returned 0 even when errors occured, and nobody ever actually checked the return value anyway. And we cannot fail a module removal anyway. Signed-off-by: Jean Delvare --- Documentation/i2c/writing-clients | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/i2c/writing-clients b/Documentation/i2c/writing-clients index e62fbfa1282..f7e04ec849b 100644 --- a/Documentation/i2c/writing-clients +++ b/Documentation/i2c/writing-clients @@ -586,10 +586,7 @@ the driver module is usually enough. void foo_cleanup(void) { if (foo_initialized == 1) { - if ((res = i2c_del_driver(&foo_driver))) { - printk("foo: Driver registration failed, module not removed.\n"); - return; - } + i2c_del_driver(&foo_driver); foo_initialized --; } } -- cgit v1.2.3 From 11de70bd4d40a1a39c1133b260bfbd6306e981d3 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 1 May 2007 23:26:34 +0200 Subject: i2c: Obsolete i2c-ixp2000, i2c-ixp4xx and scx200_i2c The new generic i2c-gpio driver should be used instead. The obsolete drivers will be removed in September 2007. Signed-off-by: Jean Delvare Cc: Deepak Saxena Cc: Jordan Crouse --- Documentation/feature-removal-schedule.txt | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'Documentation') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index c4b3bdad15d..547663bdae8 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -306,3 +306,11 @@ Why: Code was merged, then submitter immediately disappeared leaving Who: David S. Miller --------------------------- + +What: i2c-ixp2000, i2c-ixp4xx and scx200_i2c drivers +When: September 2007 +Why: Obsolete. The new i2c-gpio driver replaces all hardware-specific + I2C-over-GPIO drivers. +Who: Jean Delvare + +--------------------------- -- cgit v1.2.3 From eefcd75e72f382270f8f64e030550b10e3882b2b Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 1 May 2007 23:26:35 +0200 Subject: i2c: Documentation update Make the documentation on how to write and port i2c drivers more in line with the current state of things: * i2c-isa is deprecated and soon gone, so stop advertising it. * Drop many sensors-specific references. Most of them were outdated anyway. * Update the example code to reflect the recent and not-so-recent API and coding style preference changes. * Simplify the example init and cleanup functions. This should make things less complex to understand for newcomers. Signed-off-by: Jean Delvare --- Documentation/i2c/porting-clients | 18 ++- Documentation/i2c/writing-clients | 292 ++++---------------------------------- 2 files changed, 39 insertions(+), 271 deletions(-) (limited to 'Documentation') diff --git a/Documentation/i2c/porting-clients b/Documentation/i2c/porting-clients index ca272b263a9..7bf82c08f6c 100644 --- a/Documentation/i2c/porting-clients +++ b/Documentation/i2c/porting-clients @@ -1,4 +1,4 @@ -Revision 6, 2005-11-20 +Revision 7, 2007-04-19 Jean Delvare Greg KH @@ -20,6 +20,10 @@ yours for best results. Technical changes: +* [Driver type] Any driver that was relying on i2c-isa has to be + converted to a proper isa, platform or pci driver. This is not + covered by this guide. + * [Includes] Get rid of "version.h" and . Includes typically look like that: #include @@ -27,12 +31,10 @@ Technical changes: #include #include #include - #include /* for ISA drivers */ #include /* for hardware monitoring drivers */ #include #include /* if you need VRM support */ #include /* for class registration */ - #include /* if you have I/O operations */ Please respect this inclusion order. Some extra headers may be required for a given driver (e.g. "lm75.h"). @@ -69,20 +71,16 @@ Technical changes: sensors mailing list by providing a patch to the Documentation/hwmon/sysfs-interface file. -* [Attach] For I2C drivers, the attach function should make sure - that the adapter's class has I2C_CLASS_HWMON (or whatever class is - suitable for your driver), using the following construct: +* [Attach] The attach function should make sure that the adapter's + class has I2C_CLASS_HWMON (or whatever class is suitable for your + driver), using the following construct: if (!(adapter->class & I2C_CLASS_HWMON)) return 0; - ISA-only drivers of course don't need this. Call i2c_probe() instead of i2c_detect(). * [Detect] As mentioned earlier, the flags parameter is gone. The type_name and client_name strings are replaced by a single name string, which will be filled with a lowercase, short string. - In i2c-only drivers, drop the i2c_is_isa_adapter check, it's - useless. Same for isa-only drivers, as the test would always be - true. Only hybrid drivers (which are quite rare) still need it. The labels used for error paths are reduced to the number needed. It is advised that the labels are given descriptive names such as exit and exit_free. Don't forget to properly set err before diff --git a/Documentation/i2c/writing-clients b/Documentation/i2c/writing-clients index f7e04ec849b..3d8d36b0ad1 100644 --- a/Documentation/i2c/writing-clients +++ b/Documentation/i2c/writing-clients @@ -74,16 +74,13 @@ An example structure is below. struct foo_data { struct i2c_client client; - struct semaphore lock; /* For ISA access in `sensors' drivers. */ - int sysctl_id; /* To keep the /proc directory entry for - `sensors' drivers. */ enum chips type; /* To keep the chips type for `sensors' drivers. */ /* Because the i2c bus is slow, it is often useful to cache the read information of a chip for some time (for example, 1 or 2 seconds). It depends of course on the device whether this is really worthwhile or even sensible. */ - struct semaphore update_lock; /* When we are reading lots of information, + struct mutex update_lock; /* When we are reading lots of information, another process should not update the below information */ char valid; /* != 0 if the following fields are valid. */ @@ -104,8 +101,7 @@ some obscure clients). But we need generic reading and writing routines. I have found it useful to define foo_read and foo_write function for this. For some cases, it will be easier to call the i2c functions directly, but many chips have some kind of register-value idea that can easily -be encapsulated. Also, some chips have both ISA and I2C interfaces, and -it useful to abstract from this (only for `sensors' drivers). +be encapsulated. The below functions are simple examples, and should not be copied literally. @@ -128,24 +124,6 @@ literally. return i2c_smbus_write_word_data(client,reg,value); } -For sensors code, you may have to cope with ISA registers too. Something -like the below often works. Note the locking! - - int foo_read_value(struct i2c_client *client, u8 reg) - { - int res; - if (i2c_is_isa_client(client)) { - down(&(((struct foo_data *) (client->data)) -> lock)); - outb_p(reg,client->addr + FOO_ADDR_REG_OFFSET); - res = inb_p(client->addr + FOO_DATA_REG_OFFSET); - up(&(((struct foo_data *) (client->data)) -> lock)); - return res; - } else - return i2c_smbus_read_byte_data(client,reg); - } - -Writing is done the same way. - Probing and attaching ===================== @@ -257,10 +235,6 @@ detection algorithm. You do not have to use this parameter interface; but don't try to use function i2c_probe() if you don't. -NOTE: If you want to write a `sensors' driver, the interface is slightly - different! See below. - - Probing classes (Legacy model) ------------------------------ @@ -344,10 +318,6 @@ The detect client function is called by i2c_probe. The `kind' parameter contains -1 for a probed detection, 0 for a forced detection, or a positive number for a forced detection with a chip type forced. -Below, some things are only needed if this is a `sensors' driver. Those -parts are between /* SENSORS ONLY START */ and /* SENSORS ONLY END */ -markers. - Returning an error different from -ENODEV in a detect function will cause the detection to stop: other addresses and adapters won't be scanned. This should only be done on fatal or internal errors, such as a memory @@ -356,64 +326,20 @@ shortage or i2c_attach_client failing. For now, you can ignore the `flags' parameter. It is there for future use. int foo_detect_client(struct i2c_adapter *adapter, int address, - unsigned short flags, int kind) + int kind) { int err = 0; int i; - struct i2c_client *new_client; + struct i2c_client *client; struct foo_data *data; - const char *client_name = ""; /* For non-`sensors' drivers, put the real - name here! */ + const char *name = ""; /* Let's see whether this adapter can support what we need. - Please substitute the things you need here! - For `sensors' drivers, add `! is_isa &&' to the if statement */ + Please substitute the things you need here! */ if (!i2c_check_functionality(adapter,I2C_FUNC_SMBUS_WORD_DATA | I2C_FUNC_SMBUS_WRITE_BYTE)) goto ERROR0; - /* SENSORS ONLY START */ - const char *type_name = ""; - int is_isa = i2c_is_isa_adapter(adapter); - - /* Do this only if the chip can additionally be found on the ISA bus - (hybrid chip). */ - - if (is_isa) { - - /* Discard immediately if this ISA range is already used */ - /* FIXME: never use check_region(), only request_region() */ - if (check_region(address,FOO_EXTENT)) - goto ERROR0; - - /* Probe whether there is anything on this address. - Some example code is below, but you will have to adapt this - for your own driver */ - - if (kind < 0) /* Only if no force parameter was used */ { - /* We may need long timeouts at least for some chips. */ - #define REALLY_SLOW_IO - i = inb_p(address + 1); - if (inb_p(address + 2) != i) - goto ERROR0; - if (inb_p(address + 3) != i) - goto ERROR0; - if (inb_p(address + 7) != i) - goto ERROR0; - #undef REALLY_SLOW_IO - - /* Let's just hope nothing breaks here */ - i = inb_p(address + 5) & 0x7f; - outb_p(~i & 0x7f,address+5); - if ((inb_p(address + 5) & 0x7f) != (~i & 0x7f)) { - outb_p(i,address+5); - return 0; - } - } - } - - /* SENSORS ONLY END */ - /* OK. For now, we presume we have a valid client. We now create the client structure, even though we cannot fill it completely yet. But it allows us to access several i2c functions safely */ @@ -423,13 +349,12 @@ For now, you can ignore the `flags' parameter. It is there for future use. goto ERROR0; } - new_client = &data->client; - i2c_set_clientdata(new_client, data); + client = &data->client; + i2c_set_clientdata(client, data); - new_client->addr = address; - new_client->adapter = adapter; - new_client->driver = &foo_driver; - new_client->flags = 0; + client->addr = address; + client->adapter = adapter; + client->driver = &foo_driver; /* Now, we do the remaining detection. If no `force' parameter is used. */ @@ -437,19 +362,17 @@ For now, you can ignore the `flags' parameter. It is there for future use. parameter was used. */ if (kind < 0) { /* The below is of course bogus */ - if (foo_read(new_client,FOO_REG_GENERIC) != FOO_GENERIC_VALUE) + if (foo_read(client, FOO_REG_GENERIC) != FOO_GENERIC_VALUE) goto ERROR1; } - /* SENSORS ONLY START */ - /* Next, specific detection. This is especially important for `sensors' devices. */ /* Determine the chip type. Not needed if a `force_CHIPTYPE' parameter was used. */ if (kind <= 0) { - i = foo_read(new_client,FOO_REG_CHIPTYPE); + i = foo_read(client, FOO_REG_CHIPTYPE); if (i == FOO_TYPE_1) kind = chip1; /* As defined in the enum */ else if (i == FOO_TYPE_2) @@ -463,63 +386,31 @@ For now, you can ignore the `flags' parameter. It is there for future use. /* Now set the type and chip names */ if (kind == chip1) { - type_name = "chip1"; /* For /proc entry */ - client_name = "CHIP 1"; + name = "chip1"; } else if (kind == chip2) { - type_name = "chip2"; /* For /proc entry */ - client_name = "CHIP 2"; + name = "chip2"; } - /* Reserve the ISA region */ - if (is_isa) - request_region(address,FOO_EXTENT,type_name); - - /* SENSORS ONLY END */ - /* Fill in the remaining client fields. */ - strcpy(new_client->name,client_name); - - /* SENSORS ONLY BEGIN */ + strlcpy(client->name, name, I2C_NAME_SIZE); data->type = kind; - /* SENSORS ONLY END */ - - data->valid = 0; /* Only if you use this field */ - init_MUTEX(&data->update_lock); /* Only if you use this field */ + mutex_init(&data->update_lock); /* Only if you use this field */ /* Any other initializations in data must be done here too. */ - /* Tell the i2c layer a new client has arrived */ - if ((err = i2c_attach_client(new_client))) - goto ERROR3; - - /* SENSORS ONLY BEGIN */ - /* Register a new directory entry with module sensors. See below for - the `template' structure. */ - if ((i = i2c_register_entry(new_client, type_name, - foo_dir_table_template,THIS_MODULE)) < 0) { - err = i; - goto ERROR4; - } - data->sysctl_id = i; - - /* SENSORS ONLY END */ - /* This function can write default values to the client registers, if needed. */ - foo_init_client(new_client); + foo_init_client(client); + + /* Tell the i2c layer a new client has arrived */ + if ((err = i2c_attach_client(client))) + goto ERROR1; + return 0; /* OK, this is not exactly good programming practice, usually. But it is very code-efficient in this case. */ - ERROR4: - i2c_detach_client(new_client); - ERROR3: - ERROR2: - /* SENSORS ONLY START */ - if (is_isa) - release_region(address,FOO_EXTENT); - /* SENSORS ONLY END */ ERROR1: kfree(data); ERROR0: @@ -536,22 +427,12 @@ much simpler than the attachment code, fortunately! int foo_detach_client(struct i2c_client *client) { - int err,i; - - /* SENSORS ONLY START */ - /* Deregister with the `i2c-proc' module. */ - i2c_deregister_entry(((struct lm78_data *)(client->data))->sysctl_id); - /* SENSORS ONLY END */ + int err; /* Try to detach the client from i2c space */ if ((err = i2c_detach_client(client))) return err; - /* HYBRID SENSORS CHIP ONLY START */ - if i2c_is_isa_client(client) - release_region(client->addr,LM78_EXTENT); - /* HYBRID SENSORS CHIP ONLY END */ - kfree(i2c_get_clientdata(client)); return 0; } @@ -564,42 +445,34 @@ When the kernel is booted, or when your foo driver module is inserted, you have to do some initializing. Fortunately, just attaching (registering) the driver module is usually enough. - /* Keep track of how far we got in the initialization process. If several - things have to initialized, and we fail halfway, only those things - have to be cleaned up! */ - static int __initdata foo_initialized = 0; - static int __init foo_init(void) { int res; - printk("foo version %s (%s)\n",FOO_VERSION,FOO_DATE); if ((res = i2c_add_driver(&foo_driver))) { printk("foo: Driver registration failed, module not inserted.\n"); - foo_cleanup(); return res; } - foo_initialized ++; return 0; } - void foo_cleanup(void) + static void __exit foo_cleanup(void) { - if (foo_initialized == 1) { - i2c_del_driver(&foo_driver); - foo_initialized --; - } + i2c_del_driver(&foo_driver); } /* Substitute your own name and email address */ MODULE_AUTHOR("Frodo Looijaard " MODULE_DESCRIPTION("Driver for Barf Inc. Foo I2C devices"); + /* a few non-GPL license types are also allowed */ + MODULE_LICENSE("GPL"); + module_init(foo_init); module_exit(foo_cleanup); Note that some functions are marked by `__init', and some data structures -by `__init_data'. Hose functions and structures can be removed after +by `__initdata'. These functions and structures can be removed after kernel booting (or module loading) is completed. @@ -729,110 +602,7 @@ General purpose routines Below all general purpose routines are listed, that were not mentioned before. - /* This call returns a unique low identifier for each registered adapter, - * or -1 if the adapter was not registered. + /* This call returns a unique low identifier for each registered adapter. */ extern int i2c_adapter_id(struct i2c_adapter *adap); - -The sensors sysctl/proc interface -================================= - -This section only applies if you write `sensors' drivers. - -Each sensors driver creates a directory in /proc/sys/dev/sensors for each -registered client. The directory is called something like foo-i2c-4-65. -The sensors module helps you to do this as easily as possible. - -The template ------------- - -You will need to define a ctl_table template. This template will automatically -be copied to a newly allocated structure and filled in where necessary when -you call sensors_register_entry. - -First, I will give an example definition. - static ctl_table foo_dir_table_template[] = { - { FOO_SYSCTL_FUNC1, "func1", NULL, 0, 0644, NULL, &i2c_proc_real, - &i2c_sysctl_real,NULL,&foo_func }, - { FOO_SYSCTL_FUNC2, "func2", NULL, 0, 0644, NULL, &i2c_proc_real, - &i2c_sysctl_real,NULL,&foo_func }, - { FOO_SYSCTL_DATA, "data", NULL, 0, 0644, NULL, &i2c_proc_real, - &i2c_sysctl_real,NULL,&foo_data }, - { 0 } - }; - -In the above example, three entries are defined. They can either be -accessed through the /proc interface, in the /proc/sys/dev/sensors/* -directories, as files named func1, func2 and data, or alternatively -through the sysctl interface, in the appropriate table, with identifiers -FOO_SYSCTL_FUNC1, FOO_SYSCTL_FUNC2 and FOO_SYSCTL_DATA. - -The third, sixth and ninth parameters should always be NULL, and the -fourth should always be 0. The fifth is the mode of the /proc file; -0644 is safe, as the file will be owned by root:root. - -The seventh and eighth parameters should be &i2c_proc_real and -&i2c_sysctl_real if you want to export lists of reals (scaled -integers). You can also use your own function for them, as usual. -Finally, the last parameter is the call-back to gather the data -(see below) if you use the *_proc_real functions. - - -Gathering the data ------------------- - -The call back functions (foo_func and foo_data in the above example) -can be called in several ways; the operation parameter determines -what should be done: - - * If operation == SENSORS_PROC_REAL_INFO, you must return the - magnitude (scaling) in nrels_mag; - * If operation == SENSORS_PROC_REAL_READ, you must read information - from the chip and return it in results. The number of integers - to display should be put in nrels_mag; - * If operation == SENSORS_PROC_REAL_WRITE, you must write the - supplied information to the chip. nrels_mag will contain the number - of integers, results the integers themselves. - -The *_proc_real functions will display the elements as reals for the -/proc interface. If you set the magnitude to 2, and supply 345 for -SENSORS_PROC_REAL_READ, it would display 3.45; and if the user would -write 45.6 to the /proc file, it would be returned as 4560 for -SENSORS_PROC_REAL_WRITE. A magnitude may even be negative! - -An example function: - - /* FOO_FROM_REG and FOO_TO_REG translate between scaled values and - register values. Note the use of the read cache. */ - void foo_in(struct i2c_client *client, int operation, int ctl_name, - int *nrels_mag, long *results) - { - struct foo_data *data = client->data; - int nr = ctl_name - FOO_SYSCTL_FUNC1; /* reduce to 0 upwards */ - - if (operation == SENSORS_PROC_REAL_INFO) - *nrels_mag = 2; - else if (operation == SENSORS_PROC_REAL_READ) { - /* Update the readings cache (if necessary) */ - foo_update_client(client); - /* Get the readings from the cache */ - results[0] = FOO_FROM_REG(data->foo_func_base[nr]); - results[1] = FOO_FROM_REG(data->foo_func_more[nr]); - results[2] = FOO_FROM_REG(data->foo_func_readonly[nr]); - *nrels_mag = 2; - } else if (operation == SENSORS_PROC_REAL_WRITE) { - if (*nrels_mag >= 1) { - /* Update the cache */ - data->foo_base[nr] = FOO_TO_REG(results[0]); - /* Update the chip */ - foo_write_value(client,FOO_REG_FUNC_BASE(nr),data->foo_base[nr]); - } - if (*nrels_mag >= 2) { - /* Update the cache */ - data->foo_more[nr] = FOO_TO_REG(results[1]); - /* Update the chip */ - foo_write_value(client,FOO_REG_FUNC_MORE(nr),data->foo_more[nr]); - } - } - } -- cgit v1.2.3 From 8b8ca80e192b10eecc01fc44a2902510af86f73b Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 2 May 2007 19:27:09 +0200 Subject: [PATCH] x86-64: configurable fake numa node sizes Extends the numa=fake x86_64 command-line option to allow for configurable node sizes. These nodes can be used in conjunction with cpusets for coarse memory resource management. The old command-line option is still supported: numa=fake=32 gives 32 fake NUMA nodes, ignoring the NUMA setup of the actual machine. But now you may configure your system for the node sizes of your choice: numa=fake=2*512,1024,2*256 gives two 512M nodes, one 1024M node, two 256M nodes, and the rest of system memory to a sixth node. The existing hash function is maintained to support the various node sizes that are possible with this implementation. Each node of the same size receives roughly the same amount of available pages, regardless of any reserved memory with its address range. The total available pages on the system is calculated and divided by the number of equal nodes to allocate. These nodes are then dynamically allocated and their borders extended until such time as their number of available pages reaches the required size. Configurable node sizes are recommended when used in conjunction with cpusets for memory control because it eliminates the overhead associated with scanning the zonelists of many smaller full nodes on page_alloc(). Cc: Andi Kleen Signed-off-by: David Rientjes Signed-off-by: Andi Kleen Cc: Paul Jackson Cc: Christoph Lameter Signed-off-by: Andrew Morton --- Documentation/x86_64/boot-options.txt | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt index 85f51e5a749..7500aad95f3 100644 --- a/Documentation/x86_64/boot-options.txt +++ b/Documentation/x86_64/boot-options.txt @@ -149,7 +149,13 @@ NUMA numa=noacpi Don't parse the SRAT table for NUMA setup - numa=fake=X Fake X nodes and ignore NUMA setup of the actual machine. + numa=fake=CMDLINE + If a number, fakes CMDLINE nodes and ignores NUMA setup of the + actual machine. Otherwise, system memory is configured + depending on the sizes and coefficients listed. For example: + numa=fake=2*512,1024,4*256 + gives two 512M nodes, a 1024M node, and four 256M nodes. The + remaining system RAM is allocated to an additional node. numa=hotadd=percent Only allow hotadd memory to preallocate page structures upto -- cgit v1.2.3 From 14694d736bb66d0ec250d05c81c6e98a19c229c6 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 2 May 2007 19:27:09 +0200 Subject: [PATCH] x86-64: split remaining fake nodes equally Extends the numa=fake x86_64 command-line option to split the remaining system memory into equal-sized nodes. For example: numa=fake=2*512,4* gives two 512M nodes and the remaining system memory is split into four approximately equal chunks. This is beneficial for systems where the exact size of RAM is unknown or not necessarily relevant, but the granularity with which nodes shall be allocated is known. Cc: Andi Kleen Signed-off-by: David Rientjes Signed-off-by: Andi Kleen Cc: Paul Jackson Cc: Christoph Lameter Signed-off-by: Andrew Morton --- Documentation/x86_64/boot-options.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt index 7500aad95f3..12a9aacecaa 100644 --- a/Documentation/x86_64/boot-options.txt +++ b/Documentation/x86_64/boot-options.txt @@ -155,7 +155,9 @@ NUMA depending on the sizes and coefficients listed. For example: numa=fake=2*512,1024,4*256 gives two 512M nodes, a 1024M node, and four 256M nodes. The - remaining system RAM is allocated to an additional node. + remaining system RAM is allocated to an additional node. If + the last character of CMDLINE is a *, the remaining system RAM + is instead divided up equally among its coefficient. numa=hotadd=percent Only allow hotadd memory to preallocate page structures upto -- cgit v1.2.3 From 382591d500bbcd20a44416c5e0e292708468587c Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 2 May 2007 19:27:09 +0200 Subject: [PATCH] x86-64: fixed size remaining fake nodes Extends the numa=fake x86_64 command-line option to split the remaining system memory into nodes of fixed size. Any leftover memory is allocated to a final node unless the command-line ends with a comma. For example: numa=fake=2*512,*128 gives two 512M nodes and the remaining system memory is split into nodes of 128M each. This is beneficial for systems where the exact size of RAM is unknown or not necessarily relevant, but the size of the remaining nodes to be allocated is known based on their capacity for resource management. Cc: Andi Kleen Signed-off-by: David Rientjes Signed-off-by: Andi Kleen Cc: Paul Jackson Cc: Christoph Lameter Signed-off-by: Andrew Morton --- Documentation/x86_64/boot-options.txt | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'Documentation') diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt index 12a9aacecaa..6177d881983 100644 --- a/Documentation/x86_64/boot-options.txt +++ b/Documentation/x86_64/boot-options.txt @@ -153,11 +153,15 @@ NUMA If a number, fakes CMDLINE nodes and ignores NUMA setup of the actual machine. Otherwise, system memory is configured depending on the sizes and coefficients listed. For example: - numa=fake=2*512,1024,4*256 - gives two 512M nodes, a 1024M node, and four 256M nodes. The - remaining system RAM is allocated to an additional node. If - the last character of CMDLINE is a *, the remaining system RAM - is instead divided up equally among its coefficient. + numa=fake=2*512,1024,4*256,*128 + gives two 512M nodes, a 1024M node, four 256M nodes, and the + rest split into 128M chunks. If the last character of CMDLINE + is a *, the remaining memory is divided up equally among its + coefficient: + numa=fake=2*512,2* + gives two 512M nodes and the rest split into two nodes. + Otherwise, the remaining system RAM is allocated to an + additional node. numa=hotadd=percent Only allow hotadd memory to preallocate page structures upto -- cgit v1.2.3 From 20280195f2a3d80c42a190959ca22108c93cd7e0 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 2 May 2007 19:27:09 +0200 Subject: [PATCH] x86-64: fake numa for cpusets document Create a document to explain how to use numa=fake in conjunction with cpusets for coarse memory resource management. An attempt to get more awareness and testing for this feature. Cc: Andi Kleen Signed-off-by: David Rientjes Signed-off-by: Andi Kleen Cc: Paul Jackson Cc: Christoph Lameter Signed-off-by: Andrew Morton --- Documentation/x86_64/fake-numa-for-cpusets | 66 ++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 Documentation/x86_64/fake-numa-for-cpusets (limited to 'Documentation') diff --git a/Documentation/x86_64/fake-numa-for-cpusets b/Documentation/x86_64/fake-numa-for-cpusets new file mode 100644 index 00000000000..d1a985c5b00 --- /dev/null +++ b/Documentation/x86_64/fake-numa-for-cpusets @@ -0,0 +1,66 @@ +Using numa=fake and CPUSets for Resource Management +Written by David Rientjes + +This document describes how the numa=fake x86_64 command-line option can be used +in conjunction with cpusets for coarse memory management. Using this feature, +you can create fake NUMA nodes that represent contiguous chunks of memory and +assign them to cpusets and their attached tasks. This is a way of limiting the +amount of system memory that are available to a certain class of tasks. + +For more information on the features of cpusets, see Documentation/cpusets.txt. +There are a number of different configurations you can use for your needs. For +more information on the numa=fake command line option and its various ways of +configuring fake nodes, see Documentation/x86_64/boot-options.txt. + +For the purposes of this introduction, we'll assume a very primitive NUMA +emulation setup of "numa=fake=4*512,". This will split our system memory into +four equal chunks of 512M each that we can now use to assign to cpusets. As +you become more familiar with using this combination for resource control, +you'll determine a better setup to minimize the number of nodes you have to deal +with. + +A machine may be split as follows with "numa=fake=4*512," as reported by dmesg: + + Faking node 0 at 0000000000000000-0000000020000000 (512MB) + Faking node 1 at 0000000020000000-0000000040000000 (512MB) + Faking node 2 at 0000000040000000-0000000060000000 (512MB) + Faking node 3 at 0000000060000000-0000000080000000 (512MB) + ... + On node 0 totalpages: 130975 + On node 1 totalpages: 131072 + On node 2 totalpages: 131072 + On node 3 totalpages: 131072 + +Now following the instructions for mounting the cpusets filesystem from +Documentation/cpusets.txt, you can assign fake nodes (i.e. contiguous memory +address spaces) to individual cpusets: + + [root@xroads /]# mkdir exampleset + [root@xroads /]# mount -t cpuset none exampleset + [root@xroads /]# mkdir exampleset/ddset + [root@xroads /]# cd exampleset/ddset + [root@xroads /exampleset/ddset]# echo 0-1 > cpus + [root@xroads /exampleset/ddset]# echo 0-1 > mems + +Now this cpuset, 'ddset', will only allowed access to fake nodes 0 and 1 for +memory allocations (1G). + +You can now assign tasks to these cpusets to limit the memory resources +available to them according to the fake nodes assigned as mems: + + [root@xroads /exampleset/ddset]# echo $$ > tasks + [root@xroads /exampleset/ddset]# dd if=/dev/zero of=tmp bs=1024 count=1G + [1] 13425 + +Notice the difference between the system memory usage as reported by +/proc/meminfo between the restricted cpuset case above and the unrestricted +case (i.e. running the same 'dd' command without assigning it to a fake NUMA +cpuset): + Unrestricted Restricted + MemTotal: 3091900 kB 3091900 kB + MemFree: 42113 kB 1513236 kB + +This allows for coarse memory management for the tasks you assign to particular +cpusets. Since cpusets can form a hierarchy, you can create some pretty +interesting combinations of use-cases for various classes of tasks for your +memory management needs. -- cgit v1.2.3 From 8f9aeca7a081d81c4c9862be1e04f15b5ab5461f Mon Sep 17 00:00:00 2001 From: Bernhard Walle Date: Wed, 2 May 2007 19:27:10 +0200 Subject: [PATCH] x86: add command line length to boot protocol Because the command line is increased to 2048 characters after 2.6.21, it's not possible for boot loaders and userspace tools to determine the length of the command line the kernel can understand. The benefit of knowing the length is that users can be warned if the command line size is too long which prevents surprise if things don't work after bootup. This patch updates the boot protocol to contain a field called "cmdline_size" that contain the length of the command line (excluding the terminating zero). The patch also adds missing fields (of protocol version 2.05) to the x86_64 setup code. Signed-off-by: Bernhard Walle Signed-off-by: Andi Kleen Cc: Alon Bar-Lev Acked-by: H. Peter Anvin Cc: Andi Kleen Signed-off-by: Andrew Morton --- Documentation/i386/boot.txt | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'Documentation') diff --git a/Documentation/i386/boot.txt b/Documentation/i386/boot.txt index 38fe1f03fb1..6498666ea33 100644 --- a/Documentation/i386/boot.txt +++ b/Documentation/i386/boot.txt @@ -2,7 +2,7 @@ ---------------------------- H. Peter Anvin - Last update 2007-01-26 + Last update 2007-03-06 On the i386 platform, the Linux kernel uses a rather complicated boot convention. This has evolved partially due to historical aspects, as @@ -35,9 +35,13 @@ Protocol 2.03: (Kernel 2.4.18-pre1) Explicitly makes the highest possible initrd address available to the bootloader. Protocol 2.04: (Kernel 2.6.14) Extend the syssize field to four bytes. + Protocol 2.05: (Kernel 2.6.20) Make protected mode kernel relocatable. Introduce relocatable_kernel and kernel_alignment fields. +Protocol 2.06: (Kernel 2.6.22) Added a field that contains the size of + the boot command line + **** MEMORY LAYOUT @@ -133,6 +137,8 @@ Offset Proto Name Meaning 022C/4 2.03+ initrd_addr_max Highest legal initrd address 0230/4 2.05+ kernel_alignment Physical addr alignment required for kernel 0234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not +0235/3 N/A pad2 Unused +0238/4 2.06+ cmdline_size Maximum size of the kernel command line (1) For backwards compatibility, if the setup_sects field contains 0, the real value is 4. @@ -233,6 +239,12 @@ filled out, however: if your ramdisk is exactly 131072 bytes long and this field is 0x37FFFFFF, you can start your ramdisk at 0x37FE0000.) + cmdline_size: + The maximum size of the command line without the terminating + zero. This means that the command line can contain at most + cmdline_size characters. With protocol version 2.05 and + earlier, the maximum size was 255. + **** THE KERNEL COMMAND LINE @@ -241,11 +253,10 @@ loader to communicate with the kernel. Some of its options are also relevant to the boot loader itself, see "special command line options" below. -The kernel command line is a null-terminated string currently up to -255 characters long, plus the final null. A string that is too long -will be automatically truncated by the kernel, a boot loader may allow -a longer command line to be passed to permit future kernels to extend -this limit. +The kernel command line is a null-terminated string. The maximum +length can be retrieved from the field cmdline_size. Before protocol +version 2.06, the maximum was 255 characters. A string that is too +long will be automatically truncated by the kernel. If the boot protocol version is 2.02 or later, the address of the kernel command line is given by the header field cmd_line_ptr (see -- cgit v1.2.3 From 1dbf527c51c6c20c19869c8125cb5b87c3d09506 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:12 +0200 Subject: [PATCH] i386: Make COMPAT_VDSO runtime selectable. Now that relocation of the VDSO for COMPAT_VDSO users is done at runtime rather than compile time, it is possible to enable/disable compat mode at runtime. This patch allows you to enable COMPAT_VDSO mode with "vdso=2" on the kernel command line, or via sysctl. (Switching on a running system shouldn't be done lightly; any process which was relying on the compat VDSO will be upset if it goes away.) The COMPAT_VDSO config option still exists, but if enabled it just makes vdso_enabled default to VDSO_COMPAT. +From: Hugh Dickins Fix oops from i386-make-compat_vdso-runtime-selectable.patch. Even mingetty at system startup finds it easy to trigger an oops while reading /proc/PID/maps: though it has a good hold on the mm itself, that cannot stop exit_mm() from resetting tsk->mm to NULL. (It is usually show_map()'s call to get_gate_vma() which oopses, and I expect we could change that to check priv->tail_vma instead; but no matter, even m_start()'s call just after get_task_mm() is racy.) Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Zachary Amsden Cc: "Jan Beulich" Cc: Eric W. Biederman Cc: Andi Kleen Cc: Ingo Molnar Cc: Roland McGrath --- Documentation/kernel-parameters.txt | 1 + 1 file changed, 1 insertion(+) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 84c3bd05c63..4287696f18d 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1820,6 +1820,7 @@ and is between 256 and 4096 characters. It is defined in the file [USBHID] The interval which mice are to be polled at. vdso= [IA-32,SH] + vdso=2: enable compat VDSO (default with COMPAT_VDSO) vdso=1: enable VDSO (default) vdso=0: disable VDSO mapping -- cgit v1.2.3 From f039b754714a422959027cb18bb33760eb8153f0 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 2 May 2007 19:27:12 +0200 Subject: [PATCH] x86: Don't use MWAIT on AMD Family 10 It doesn't put the CPU into deeper sleep states, so it's better to use the standard idle loop to save power. But allow to reenable it anyways for benchmarking. I also removed the obsolete idle=halt on i386 Cc: andreas.herrmann@amd.com Signed-off-by: Andi Kleen --- Documentation/kernel-parameters.txt | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4287696f18d..94ce0d20253 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -695,8 +695,15 @@ and is between 256 and 4096 characters. It is defined in the file idebus= [HW] (E)IDE subsystem - VLB/PCI bus speed See Documentation/ide.txt. - idle= [HW] - Format: idle=poll or idle=halt + idle= [X86] + Format: idle=poll or idle=mwait + Poll forces a polling idle loop that can slightly improves the performance + of waking up a idle CPU, but will use a lot of power and make the system + run hot. Not recommended. + idle=mwait. On systems which support MONITOR/MWAIT but the kernel chose + to not use it because it doesn't save as much power as a normal idle + loop use the MONITOR/MWAIT idle loop anyways. Performance should be the same + as idle=poll. ignore_loglevel [KNL] Ignore loglevel setting - this will print /all/ -- cgit v1.2.3 From b7fb4af06c18496950a45b365f7a09c47ea64c17 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:13 +0200 Subject: [PATCH] i386: Allow boot-time disable of SMP altinstructions Add "noreplace-smp" to disable SMP instruction replacement. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen --- Documentation/kernel-parameters.txt | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 94ce0d20253..242b3a09b6c 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1164,6 +1164,9 @@ and is between 256 and 4096 characters. It is defined in the file nomce [IA-32] Machine Check Exception + noreplace-smp [IA-32,SMP] Don't replace SMP instructions + with UP alternatives + noresidual [PPC] Don't use residual data on PReP machines. noresume [SWSUSP] Disables resume and restores original swap @@ -1569,6 +1572,9 @@ and is between 256 and 4096 characters. It is defined in the file smart2= [HW] Format: [,[,...,]] + smp-alt-once [IA-32,SMP] On a hotplug CPU system, only + attempt to substitute SMP alternatives once at boot. + snd-ad1816a= [HW,ALSA] snd-ad1848= [HW,ALSA] -- cgit v1.2.3 From 959b4fdfe7e27bcf101e2381e500e4076f2bb9ce Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:16 +0200 Subject: [PATCH] i386: PARAVIRT: Allow boot-time disable of paravirt_ops patching Add "noreplace-paravirt" to disable paravirt_ops patching. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Rusty Russell Cc: Andi Kleen Signed-off-by: Andrew Morton --- Documentation/kernel-parameters.txt | 3 +++ 1 file changed, 3 insertions(+) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 242b3a09b6c..38d7db3262c 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -64,6 +64,7 @@ parameter is applicable: GENERIC_TIME The generic timeofday code is enabled. NFS Appropriate NFS support is enabled. OSS OSS sound support is enabled. + PV_OPS A paravirtualized kernel PARIDE The ParIDE subsystem is enabled. PARISC The PA-RISC architecture is enabled. PCI PCI bus support is enabled. @@ -1164,6 +1165,8 @@ and is between 256 and 4096 characters. It is defined in the file nomce [IA-32] Machine Check Exception + noreplace-paravirt [IA-32,PV_OPS] Don't patch paravirt_ops + noreplace-smp [IA-32,SMP] Don't replace SMP instructions with UP alternatives -- cgit v1.2.3 From 8a336b0a4b6dfacc8cc5fd617ba1e1904077de2d Mon Sep 17 00:00:00 2001 From: Tim Hockin Date: Wed, 2 May 2007 19:27:19 +0200 Subject: [PATCH] x86-64: Dynamically adjust machine check interval Background: We've found that MCEs (specifically DRAM SBEs) tend to come in bunches, especially when we are trying really hard to stress the system out. The current MCE poller uses a static interval which does not care whether it has or has not found MCEs recently. Description: This patch makes the MCE poller adjust the polling interval dynamically. If we find an MCE, poll 2x faster (down to 10 ms). When we stop finding MCEs, poll 2x slower (up to check_interval seconds). The check_interval tunable becomes the max polling interval. The "Machine check events logged" printk() is rate limited to the check_interval, which should be identical behavior to the old functionality. Result: If you start to take a lot of correctable errors (not exceptions), you log them faster and more accurately (less chance of overflowing the MCA registers). If you don't take a lot of errors, you will see no change. Alternatives: I considered simply reducing the polling interval to 10 ms immediately and keeping it there as long as we continue to find errors. This felt a bit heavy handed, but does perform significantly better for the default check_interval of 5 minutes (we're using a few seconds when testing for DRAM errors). I could be convinced to go with this, if anyone felt it was not too aggressive. Testing: I used an error-injecting DIMM to create lots of correctable DRAM errors and verified that the polling interval accelerates. The printk() only happens once per check_interval seconds. Patch: This patch is against 2.6.21-rc7. Signed-Off-By: Tim Hockin Signed-off-by: Andi Kleen --- Documentation/x86_64/machinecheck | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/x86_64/machinecheck b/Documentation/x86_64/machinecheck index 068a6d9904b..feaeaf6f6e4 100644 --- a/Documentation/x86_64/machinecheck +++ b/Documentation/x86_64/machinecheck @@ -36,7 +36,12 @@ between all CPUs. check_interval How often to poll for corrected machine check errors, in seconds - (Note output is hexademical). Default 5 minutes. + (Note output is hexademical). Default 5 minutes. When the poller + finds MCEs it triggers an exponential speedup (poll more often) on + the polling interval. When the poller stops finding MCEs, it + triggers an exponential backoff (poll less often) on the polling + interval. The check_interval variable is both the initial and + maximum polling interval. tolerant Tolerance level. When a machine check exception occurs for a non -- cgit v1.2.3 From b466004a660c490f3bfb12be8b5ca18bfc393261 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 2 May 2007 19:27:21 +0200 Subject: [PATCH] x86-64: Don't exclude asm-offsets.c in Documentation/dontdiff asm-offsets.c is valid source code and needs to be diffed. Signed-off-by: Andi Kleen --- Documentation/dontdiff | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/dontdiff b/Documentation/dontdiff index 63c2d0c55aa..64e9f6c4826 100644 --- a/Documentation/dontdiff +++ b/Documentation/dontdiff @@ -55,8 +55,8 @@ aic7*seq.h* aicasm aicdb.h* asm -asm-offsets.* -asm_offsets.* +asm-offsets.h +asm_offsets.h autoconf.h* bbootsect bin2c -- cgit v1.2.3 From ded2e1640ffaee26c054a42e5210c1086fb1d8eb Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 20 Mar 2007 09:47:47 -0500 Subject: kbuild: small documentation fix in Documentation/kbuild/modules.txt The Makefile fragment in Documentation/kbuild/modules.txt looks to be missing some braces. Signed-off-by: Anton Blanchard Signed-off-by: Sam Ravnborg --- Documentation/kbuild/modules.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/kbuild/modules.txt b/Documentation/kbuild/modules.txt index 769ee05ee4d..1d247d59ad5 100644 --- a/Documentation/kbuild/modules.txt +++ b/Documentation/kbuild/modules.txt @@ -249,7 +249,7 @@ following files: --> filename: Makefile KERNELDIR := /lib/modules/`uname -r`/build all:: - $(MAKE) -C $KERNELDIR M=`pwd` $@ + $(MAKE) -C $(KERNELDIR) M=`pwd` $@ # Module specific targets genbin: -- cgit v1.2.3 From e711db3edfe7c0e32b6430e7d041905f856aa79a Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Fri, 13 Apr 2007 23:07:00 +0200 Subject: kbuild: fix make mrproper for Documentation/DocBook/man "make mandocs" generate > 2000 files in Documentation/DocBook/man and this caused kbuild to barf out during make mrproper like this: make -f scripts/Makefile.clean obj=Documentation/DocBook make -f scripts/Makefile.clean obj=Documentation/DocBook/man/ make[2]: execvp: /bin/sh: Argument list too long make[2]: *** [__clean] Error 127 make[1]: *** [Documentation/DocBook/man/] Error 2 make: *** [_mrproper_Documentation/DocBook] Error 2 The man directory were solely used for output so the fix is to remove it entirely during the make mrproper process. Signed-off-by: Sam Ravnborg Acked-by: Randy Dunlap --- Documentation/DocBook/Makefile | 7 ++----- Documentation/DocBook/man/Makefile | 3 --- 2 files changed, 2 insertions(+), 8 deletions(-) delete mode 100644 Documentation/DocBook/man/Makefile (limited to 'Documentation') diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile index 867608ab3ca..960f4025ee8 100644 --- a/Documentation/DocBook/Makefile +++ b/Documentation/DocBook/Makefile @@ -152,6 +152,7 @@ quiet_cmd_db2man = MAN $@ @(which xmlto > /dev/null 2>&1) || \ (echo "*** You need to install xmlto ***"; \ exit 1) + $(Q)mkdir -p $(obj)/man $(call cmd,db2man) @touch $@ @@ -212,11 +213,7 @@ clean-files := $(DOCBOOKS) \ $(patsubst %.xml, %.9, $(DOCBOOKS)) \ $(C-procfs-example) -clean-dirs := $(patsubst %.xml,%,$(DOCBOOKS)) - -#man put files in man subdir - traverse down -subdir- := man/ - +clean-dirs := $(patsubst %.xml,%,$(DOCBOOKS)) man # Declare the contents of the .PHONY variable as phony. We keep that # information in a variable se we can use it in if_changed and friends. diff --git a/Documentation/DocBook/man/Makefile b/Documentation/DocBook/man/Makefile deleted file mode 100644 index 4fb7ea0f7ac..00000000000 --- a/Documentation/DocBook/man/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -# Rules are put in Documentation/DocBook - -clean-files := *.9.gz *.sgml manpage.links manpage.refs -- cgit v1.2.3 From f15a3ccdc800cef08b346fef5f96860a05e7a3fa Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 11 Apr 2007 08:44:12 -0700 Subject: kernel-doc: alphabetically-sorted entries in index.html of 'htmldocs' Make docbook index.html contain sorted output. I prefer to let the computer do it. This also avoids people not reading the comment(s). Signed-off-by: Randy Dunlap Signed-off-by: Sam Ravnborg --- Documentation/DocBook/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile index 960f4025ee8..10b5cd6c54a 100644 --- a/Documentation/DocBook/Makefile +++ b/Documentation/DocBook/Makefile @@ -41,7 +41,7 @@ psdocs: $(PS) PDF := $(patsubst %.xml, %.pdf, $(BOOKS)) pdfdocs: $(PDF) -HTML := $(patsubst %.xml, %.html, $(BOOKS)) +HTML := $(sort $(patsubst %.xml, %.html, $(BOOKS))) htmldocs: $(HTML) MAN := $(patsubst %.xml, %.9, $(BOOKS)) -- cgit v1.2.3 From 3265b54556b2d8ed4e9612b08edb592b60205c40 Mon Sep 17 00:00:00 2001 From: Rolf Eike Beer Date: Tue, 1 May 2007 11:00:19 +0200 Subject: DOC: Fix wrong identifier name in Documentation/driver-model/devres.txt Above and below we talk about my_midlayer_create_something, I assume that is also meant here. Signed-off-by: Rolf Eike Beer Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- Documentation/driver-model/devres.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt index 5163b85308f..6c8d8f27db3 100644 --- a/Documentation/driver-model/devres.txt +++ b/Documentation/driver-model/devres.txt @@ -182,7 +182,7 @@ For example, you can do something like the following. ... - devres_close_group(dev, my_midlayer_something); + devres_close_group(dev, my_midlayer_create_something); return 0; } -- cgit v1.2.3 From 6ba186361ed2cda7e174856a3ab8a8e3237b3c3d Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sat, 7 Apr 2007 17:21:28 +0200 Subject: PCI: Require vendor and device for new_id Currently, there is no minimum number of fields required when adding a new device ID to a PCI driver through the new_id sysfs file. It is possible to add a new ID with only the vendor ID set, causing the driver to attempt to attach to all PCI devices from that vendor. This has been reported to happen accidentally: http://lists.lm-sensors.org/pipermail/lm-sensors/2007-March/019366.html It is even possible to not even set the vendor ID field, causing the driver to attempt to attach to _all_ the PCI devices. This sounds dangerous and I fail to see any valid use of this "feature". Thus I suggest that we now require at least the first two fields (vendor ID and device ID) to be set. For what it's worth, this is what the USB subsystem does. Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- Documentation/pci.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/pci.txt b/Documentation/pci.txt index cdf2f3c0ab1..a8ded1a8bd6 100644 --- a/Documentation/pci.txt +++ b/Documentation/pci.txt @@ -163,9 +163,9 @@ echo "vendor device subvendor subdevice class class_mask driver_data" > \ /sys/bus/pci/drivers/{driver}/new_id All fields are passed in as hexadecimal values (no leading 0x). -Users need pass only as many fields as necessary: - o vendor, device, subvendor, and subdevice fields default - to PCI_ANY_ID (FFFFFFFF), +The vendor and device fields are mandatory, the others are optional. Users +need pass only as many optional fields as necessary: + o subvendor and subdevice fields default to PCI_ANY_ID (FFFFFFFF) o class and classmask fields default to 0 o driver_data defaults to 0UL. -- cgit v1.2.3 From 54eee4c5bf553ad54ba200d00487b61eb6b155f6 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 4 Apr 2007 21:35:39 -0700 Subject: PCI Documentation: power/pci.txt fix copy/paste error Correct function name copy-paste error. Signed-off-by: Randy Dunlap Signed-off-by: Greg Kroah-Hartman --- Documentation/power/pci.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/power/pci.txt b/Documentation/power/pci.txt index b6a3cbf7e84..e00b099a4b8 100644 --- a/Documentation/power/pci.txt +++ b/Documentation/power/pci.txt @@ -203,7 +203,7 @@ resume Usage: -if (dev->driver && dev->driver->suspend) +if (dev->driver && dev->driver->resume) dev->driver->resume(dev) The resume callback may be called from any power state, and is always meant to -- cgit v1.2.3 From 5adc55da4a7758021bcc374904b0f8b076508a11 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 27 Mar 2007 03:02:51 +0200 Subject: PCI: remove the broken PCI_MULTITHREAD_PROBE option This patch removes the PCI_MULTITHREAD_PROBE option that had already been marked as broken. Signed-off-by: Adrian Bunk Signed-off-by: Greg Kroah-Hartman --- Documentation/pci.txt | 4 ---- 1 file changed, 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/pci.txt b/Documentation/pci.txt index a8ded1a8bd6..40c4717d236 100644 --- a/Documentation/pci.txt +++ b/Documentation/pci.txt @@ -124,10 +124,6 @@ initialization with a pointer to a structure describing the driver err_handler See Documentation/pci-error-recovery.txt - multithread_probe Enable multi-threaded probe/scan. Driver must - provide its own locking/syncronization for init - operations if this is enabled. - The ID table is an array of struct pci_device_id entries ending with an all-zero entry. Each entry consists of: -- cgit v1.2.3 From ecf36501bc4ad399e6df2e0bdaa513a2d510b7ec Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Fri, 6 Apr 2007 12:19:48 +0200 Subject: PCI: the overdue removal of pci_module_init() Unless we finally completely remove it, people will always add new users. Signed-off-by: Adrian Bunk Signed-off-by: Greg Kroah-Hartman --- Documentation/feature-removal-schedule.txt | 7 ------- Documentation/pci.txt | 2 -- 2 files changed, 9 deletions(-) (limited to 'Documentation') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 5c88ba1ea26..144058cf849 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -117,13 +117,6 @@ Who: Adrian Bunk --------------------------- -What: pci_module_init(driver) -When: January 2007 -Why: Is replaced by pci_register_driver(pci_driver). -Who: Richard Knutsson and Greg Kroah-Hartman - ---------------------------- - What: Usage of invalid timevals in setitimer When: March 2007 Why: POSIX requires to validate timevals in the setitimer call. This diff --git a/Documentation/pci.txt b/Documentation/pci.txt index 40c4717d236..e2c9d0a0c43 100644 --- a/Documentation/pci.txt +++ b/Documentation/pci.txt @@ -545,8 +545,6 @@ pci_find_slot() Find pci_dev corresponding to given bus and pci_set_power_state() Set PCI Power Management state (0=D0 ... 3=D3) pci_find_capability() Find specified capability in device's capability list. -pci_module_init() Inline helper function for ensuring correct - pci_driver initialization and error handling. pci_resource_start() Returns bus start address for a given PCI region pci_resource_end() Returns bus end address for a given PCI region pci_resource_len() Returns the byte length of a PCI region -- cgit v1.2.3 From 52706ec903dcc7679acf5b93400d68fbc5384553 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Fri, 4 May 2007 18:47:50 +0200 Subject: [S390] cio: Deprecate read_dev_chars() and read_conf_data{,_lpm}(). These helper functions are a leftover from 2.4 sync I/O and are a notorious source for bugs. They lead to device driver specific code creeping into cio, and some issues can't really be fixed at all. Device drivers can easily implement those functions themselves in a more robust manner, so let's get rid of them. Signed-off-by: Cornelia Huck Signed-off-by: Martin Schwidefsky --- Documentation/feature-removal-schedule.txt | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'Documentation') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 5c88ba1ea26..1a9e600a73a 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -314,3 +314,20 @@ Why: Code was merged, then submitter immediately disappeared leaving Who: David S. Miller --------------------------- + +What: read_dev_chars(), read_conf_data{,_lpm}() (s390 common I/O layer) +When: December 2007 +Why: These functions are a leftover from 2.4 times. They have several + problems: + - Duplication of checks that are done in the device driver's + interrupt handler + - common I/O layer can't do device specific error recovery + - device driver can't be notified for conditions happening during + execution of the function + Device drivers should issue the read device characteristics and read + configuration data ccws and do the appropriate error handling + themselves. +Who: Cornelia Huck + +--------------------------- + -- cgit v1.2.3 From 8bc8493063f938c932819958a7b5a0d56046bc96 Mon Sep 17 00:00:00 2001 From: Stuart MacDonald Date: Fri, 4 May 2007 16:00:03 -0400 Subject: MAINTAINER change for Connect Tech Inc I am no longer with CTI. The Support Department will handle all inquiries regarding the WH. Signed-off-by: Stuart MacDonald Signed-off-by: Linus Torvalds --- Documentation/usb/usb-serial.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/usb/usb-serial.txt b/Documentation/usb/usb-serial.txt index d61f6e7865d..b18e86a2250 100644 --- a/Documentation/usb/usb-serial.txt +++ b/Documentation/usb/usb-serial.txt @@ -42,7 +42,7 @@ ConnectTech WhiteHEAT 4 port converter http://www.connecttech.com For any questions or problems with this driver, please contact - Stuart MacDonald at stuartm@connecttech.com + Connect Tech's Support Department at support@connecttech.com HandSpring Visor, Palm USB, and Clié USB driver -- cgit v1.2.3 From d29c91c70bc7790b112119135fae7690cbf17577 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Wed, 14 Mar 2007 14:25:49 +0900 Subject: doc: Update sysrq doc for sh kgdb trigger. sh uses the same sysrq trigger as ppc, update the documentation to reflect that. Signed-off-by: Paul Mundt --- Documentation/sysrq.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt index d43aa9d3c10..ba328f25541 100644 --- a/Documentation/sysrq.txt +++ b/Documentation/sysrq.txt @@ -1,6 +1,6 @@ Linux Magic System Request Key Hacks Documentation for sysrq.c -Last update: 2007-JAN-06 +Last update: 2007-MAR-14 * What is the magic SysRq key? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -75,7 +75,7 @@ On all - write a character to /proc/sysrq-trigger. e.g.: 'f' - Will call oom_kill to kill a memory hog process. -'g' - Used by kgdb on ppc platforms. +'g' - Used by kgdb on ppc and sh platforms. 'h' - Will display help (actually any other key than those listed above will display help. but 'h' is easy to remember :-) -- cgit v1.2.3 From 1929cb340b74904c130fdf3de3fe5bbedb68a5aa Mon Sep 17 00:00:00 2001 From: dmitry pervushin Date: Tue, 24 Apr 2007 13:39:09 +0900 Subject: sh: SH7722 clock framework support. This adds support for the SH7722 (MobileR) to the clock framework. Signed-off-by: dmitry pervushin Signed-off-by: Paul Mundt --- Documentation/sh/clk.txt | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 Documentation/sh/clk.txt (limited to 'Documentation') diff --git a/Documentation/sh/clk.txt b/Documentation/sh/clk.txt new file mode 100644 index 00000000000..9aef710e9a4 --- /dev/null +++ b/Documentation/sh/clk.txt @@ -0,0 +1,32 @@ +Clock framework on SuperH architecture + +The framework on SH extends existing API by the function clk_set_rate_ex, +which prototype is as follows: + + clk_set_rate_ex (struct clk *clk, unsigned long rate, int algo_id) + +The algo_id parameter is used to specify algorithm used to recalculate clocks, +adjanced to clock, specified as first argument. It is assumed that algo_id==0 +means no changes to adjanced clock + +Internally, the clk_set_rate_ex forwards request to clk->ops->set_rate method, +if it is present in ops structure. The method should set the clock rate and adjust +all needed clocks according to the passed algo_id. +Exact values for algo_id are machine-dependend. For the sh7722, the following +values are defined: + + NO_CHANGE = 0, + IUS_N1_N1, /* I:U = N:1, U:Sh = N:1 */ + IUS_322, /* I:U:Sh = 3:2:2 */ + IUS_522, /* I:U:Sh = 5:2:2 */ + IUS_N11, /* I:U:Sh = N:1:1 */ + SB_N1, /* Sh:B = N:1 */ + SB3_N1, /* Sh:B3 = N:1 */ + SB3_32, /* Sh:B3 = 3:2 */ + SB3_43, /* Sh:B3 = 4:3 */ + SB3_54, /* Sh:B3 = 5:4 */ + BP_N1, /* B:P = N:1 */ + IP_N1 /* I:P = N:1 */ + +Each of these constants means relation between clocks that can be set via the FRQCR +register -- cgit v1.2.3 From 6179b5562d5d17c7c09b54cb11dd925ca308d7a9 Mon Sep 17 00:00:00 2001 From: Bernhard Walle Date: Sun, 6 May 2007 14:48:44 -0700 Subject: add new_id to PCMCIA drivers PCI drivers have the new_id file in sysfs which allows new IDs to be added at runtime. The advantage is to avoid re-compilation of a driver that works for a new device, but it's ID table doesn't contain the new device. This mechanism is only meant for testing, after the driver has been tested successfully, the ID should be added in source code so that new revisions of the kernel automatically detect the device. The implementation follows the PCI implementation. The interface is documented in Documentation/pcmcia/driver.txt. Computations should be done in userspace, so the sysfs string contains the raw structure members for matching. Signed-off-by: Bernhard Walle Cc: Dominik Brodowski Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/pcmcia/driver.txt | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 Documentation/pcmcia/driver.txt (limited to 'Documentation') diff --git a/Documentation/pcmcia/driver.txt b/Documentation/pcmcia/driver.txt new file mode 100644 index 00000000000..0ac16792077 --- /dev/null +++ b/Documentation/pcmcia/driver.txt @@ -0,0 +1,30 @@ +PCMCIA Driver +------------- + + +sysfs +----- + +New PCMCIA IDs may be added to a device driver pcmcia_device_id table at +runtime as shown below: + +echo "match_flags manf_id card_id func_id function device_no \ +prod_id_hash[0] prod_id_hash[1] prod_id_hash[2] prod_id_hash[3]" > \ +/sys/bus/pcmcia/drivers/{driver}/new_id + +All fields are passed in as hexadecimal values (no leading 0x). +The meaning is described in the PCMCIA specification, the match_flags is +a bitwise or-ed combination from PCMCIA_DEV_ID_MATCH_* constants +defined in include/linux/mod_devicetable.h. + +Once added, the driver probe routine will be invoked for any unclaimed +PCMCIA device listed in its (newly updated) pcmcia_device_id list. + +A common use-case is to add a new device according to the manufacturer ID +and the card ID (form the manf_id and card_id file in the device tree). +For this, just use: + +echo "0x3 manf_id card_id 0 0 0 0 0 0 0" > \ + /sys/bus/pcmcia/drivers/{driver}/new_id + +after loading the driver. -- cgit v1.2.3 From b813e931b4c8235bb42e301096ea97dbdee3e8fe Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Sun, 6 May 2007 14:49:24 -0700 Subject: smaps: add clear_refs file to clear reference Adds /proc/pid/clear_refs. When any non-zero number is written to this file, pte_mkold() and ClearPageReferenced() is called for each pte and its corresponding page, respectively, in that task's VMAs. This file is only writable by the user who owns the task. It is now possible to measure _approximately_ how much memory a task is using by clearing the reference bits with echo 1 > /proc/pid/clear_refs and checking the reference count for each VMA from the /proc/pid/smaps output at a measured time interval. For example, to observe the approximate change in memory footprint for a task, write a script that clears the references (echo 1 > /proc/pid/clear_refs), sleeps, and then greps for Pgs_Referenced and extracts the size in kB. Add the sizes for each VMA together for the total referenced footprint. Moments later, repeat the process and observe the difference. For example, using an efficient Mozilla: accumulated time referenced memory ---------------- ----------------- 0 s 408 kB 1 s 408 kB 2 s 556 kB 3 s 1028 kB 4 s 872 kB 5 s 1956 kB 6 s 416 kB 7 s 1560 kB 8 s 2336 kB 9 s 1044 kB 10 s 416 kB This is a valuable tool to get an approximate measurement of the memory footprint for a task. Cc: Hugh Dickins Cc: Paul Mundt Cc: Christoph Lameter Signed-off-by: David Rientjes [akpm@linux-foundation.org: build fixes] [mpm@selenic.com: rename for_each_pmd] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/proc.txt | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) (limited to 'Documentation') diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 7aaf09b86a5..3f4b226572e 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -122,21 +122,22 @@ subdirectory has the entries listed in Table 1-1. Table 1-1: Process specific entries in /proc .............................................................................. - File Content - cmdline Command line arguments - cpu Current and last cpu in which it was executed (2.4)(smp) - cwd Link to the current working directory - environ Values of environment variables - exe Link to the executable of this process - fd Directory, which contains all file descriptors - maps Memory maps to executables and library files (2.4) - mem Memory held by this process - root Link to the root directory of this process - stat Process status - statm Process memory status information - status Process status in human readable form - wchan If CONFIG_KALLSYMS is set, a pre-decoded wchan - smaps Extension based on maps, presenting the rss size for each mapped file + File Content + clear_refs Clears page referenced bits shown in smaps output + cmdline Command line arguments + cpu Current and last cpu in which it was executed (2.4)(smp) + cwd Link to the current working directory + environ Values of environment variables + exe Link to the executable of this process + fd Directory, which contains all file descriptors + maps Memory maps to executables and library files (2.4) + mem Memory held by this process + root Link to the root directory of this process + stat Process status + statm Process memory status information + status Process status in human readable form + wchan If CONFIG_KALLSYMS is set, a pre-decoded wchan + smaps Extension based on maps, the rss size for each mapped file .............................................................................. For example, to get the status information of a process, all you have to do is -- cgit v1.2.3 From 352434211dad370316155d90d7dab590519f465b Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 6 May 2007 14:49:47 -0700 Subject: slub: user documentation Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/vm/slub.txt | 113 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 Documentation/vm/slub.txt (limited to 'Documentation') diff --git a/Documentation/vm/slub.txt b/Documentation/vm/slub.txt new file mode 100644 index 00000000000..727c8d81aea --- /dev/null +++ b/Documentation/vm/slub.txt @@ -0,0 +1,113 @@ +Short users guide for SLUB +-------------------------- + +First of all slub should transparently replace SLAB. If you enable +SLUB then everything should work the same (Note the word "should". +There is likely not much value in that word at this point). + +The basic philosophy of SLUB is very different from SLAB. SLAB +requires rebuilding the kernel to activate debug options for all +SLABS. SLUB always includes full debugging but its off by default. +SLUB can enable debugging only for selected slabs in order to avoid +an impact on overall system performance which may make a bug more +difficult to find. + +In order to switch debugging on one can add a option "slub_debug" +to the kernel command line. That will enable full debugging for +all slabs. + +Typically one would then use the "slabinfo" command to get statistical +data and perform operation on the slabs. By default slabinfo only lists +slabs that have data in them. See "slabinfo -h" for more options when +running the command. slabinfo can be compiled with + +gcc -o slabinfo Documentation/vm/slabinfo.c + +Some of the modes of operation of slabinfo require that slub debugging +be enabled on the command line. F.e. no tracking information will be +available without debugging on and validation can only partially +be performed if debugging was not switched on. + +Some more sophisticated uses of slub_debug: +------------------------------------------- + +Parameters may be given to slub_debug. If none is specified then full +debugging is enabled. Format: + +slub_debug= Enable options for all slabs +slub_debug=, + Enable options only for select slabs + +Possible debug options are + F Sanity checks on (enables SLAB_DEBUG_FREE. Sorry + SLAB legacy issues) + Z Red zoning + P Poisoning (object and padding) + U User tracking (free and alloc) + T Trace (please only use on single slabs) + +F.e. in order to boot just with sanity checks and red zoning one would specify: + + slub_debug=FZ + +Trying to find an issue in the dentry cache? Try + + slub_debug=,dentry_cache + +to only enable debugging on the dentry cache. + +Red zoning and tracking may realign the slab. We can just apply sanity checks +to the dentry cache with + + slub_debug=F,dentry_cache + +In case you forgot to enable debugging on the kernel command line: It is +possible to enable debugging manually when the kernel is up. Look at the +contents of: + +/sys/slab// + +Look at the writable files. Writing 1 to them will enable the +corresponding debug option. All options can be set on a slab that does +not contain objects. If the slab already contains objects then sanity checks +and tracing may only be enabled. The other options may cause the realignment +of objects. + +Careful with tracing: It may spew out lots of information and never stop if +used on the wrong slab. + +SLAB Merging +------------ + +If no debugging is specified then SLUB may merge similar slabs together +in order to reduce overhead and increase cache hotness of objects. +slabinfo -a displays which slabs were merged together. + +Getting more performance +------------------------ + +To some degree SLUB's performance is limited by the need to take the +list_lock once in a while to deal with partial slabs. That overhead is +governed by the order of the allocation for each slab. The allocations +can be influenced by kernel parameters: + +slub_min_objects=x (default 8) +slub_min_order=x (default 0) +slub_max_order=x (default 4) + +slub_min_objects allows to specify how many objects must at least fit +into one slab in order for the allocation order to be acceptable. +In general slub will be able to perform this number of allocations +on a slab without consulting centralized resources (list_lock) where +contention may occur. + +slub_min_order specifies a minim order of slabs. A similar effect like +slub_min_objects. + +slub_max_order specified the order at which slub_min_objects should no +longer be checked. This is useful to avoid SLUB trying to generate +super large order pages to fit slub_min_objects of a slab cache with +large object sizes into one high order page. + + +Christoph Lameter, , April 10, 2007 -- cgit v1.2.3 From c09d87517298fd01543739ba26987645deb4e6a9 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 6 May 2007 14:49:48 -0700 Subject: slub: add slabinfo tool Add the tool which gets reports about slabs to the VM documentation directory. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/vm/slabinfo.c | 943 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 943 insertions(+) create mode 100644 Documentation/vm/slabinfo.c (limited to 'Documentation') diff --git a/Documentation/vm/slabinfo.c b/Documentation/vm/slabinfo.c new file mode 100644 index 00000000000..41710ccf3a2 --- /dev/null +++ b/Documentation/vm/slabinfo.c @@ -0,0 +1,943 @@ +/* + * Slabinfo: Tool to get reports about slabs + * + * (C) 2007 sgi, Christoph Lameter + * + * Compile by: + * + * gcc -o slabinfo slabinfo.c + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MAX_SLABS 500 +#define MAX_ALIASES 500 +#define MAX_NODES 1024 + +struct slabinfo { + char *name; + int alias; + int refs; + int aliases, align, cache_dma, cpu_slabs, destroy_by_rcu; + int hwcache_align, object_size, objs_per_slab; + int sanity_checks, slab_size, store_user, trace; + int order, poison, reclaim_account, red_zone; + unsigned long partial, objects, slabs; + int numa[MAX_NODES]; + int numa_partial[MAX_NODES]; +} slabinfo[MAX_SLABS]; + +struct aliasinfo { + char *name; + char *ref; + struct slabinfo *slab; +} aliasinfo[MAX_ALIASES]; + +int slabs = 0; +int aliases = 0; +int alias_targets = 0; +int highest_node = 0; + +char buffer[4096]; + +int show_alias = 0; +int show_slab = 0; +int skip_zero = 1; +int show_numa = 0; +int show_track = 0; +int show_first_alias = 0; +int validate = 0; +int shrink = 0; +int show_inverted = 0; +int show_single_ref = 0; +int show_totals = 0; +int sort_size = 0; + +int page_size; + +regex_t pattern; + +void fatal(const char *x, ...) +{ + va_list ap; + + va_start(ap, x); + vfprintf(stderr, x, ap); + va_end(ap); + exit(1); +} + +void usage(void) +{ + printf("slabinfo [-ahnpvtsz] [slab-regexp]\n" + "-a|--aliases Show aliases\n" + "-h|--help Show usage information\n" + "-n|--numa Show NUMA information\n" + "-s|--shrink Shrink slabs\n" + "-v|--validate Validate slabs\n" + "-t|--tracking Show alloc/free information\n" + "-T|--Totals Show summary information\n" + "-l|--slabs Show slabs\n" + "-S|--Size Sort by size\n" + "-z|--zero Include empty slabs\n" + "-f|--first-alias Show first alias\n" + "-i|--inverted Inverted list\n" + "-1|--1ref Single reference\n" + ); +} + +unsigned long read_obj(char *name) +{ + FILE *f = fopen(name, "r"); + + if (!f) + buffer[0] = 0; + else { + if (!fgets(buffer,sizeof(buffer), f)) + buffer[0] = 0; + fclose(f); + if (buffer[strlen(buffer)] == '\n') + buffer[strlen(buffer)] = 0; + } + return strlen(buffer); +} + + +/* + * Get the contents of an attribute + */ +unsigned long get_obj(char *name) +{ + if (!read_obj(name)) + return 0; + + return atol(buffer); +} + +unsigned long get_obj_and_str(char *name, char **x) +{ + unsigned long result = 0; + char *p; + + *x = NULL; + + if (!read_obj(name)) { + x = NULL; + return 0; + } + result = strtoul(buffer, &p, 10); + while (*p == ' ') + p++; + if (*p) + *x = strdup(p); + return result; +} + +void set_obj(struct slabinfo *s, char *name, int n) +{ + char x[100]; + + sprintf(x, "%s/%s", s->name, name); + + FILE *f = fopen(x, "w"); + + if (!f) + fatal("Cannot write to %s\n", x); + + fprintf(f, "%d\n", n); + fclose(f); +} + +/* + * Put a size string together + */ +int store_size(char *buffer, unsigned long value) +{ + unsigned long divisor = 1; + char trailer = 0; + int n; + + if (value > 1000000000UL) { + divisor = 100000000UL; + trailer = 'G'; + } else if (value > 1000000UL) { + divisor = 100000UL; + trailer = 'M'; + } else if (value > 1000UL) { + divisor = 100; + trailer = 'K'; + } + + value /= divisor; + n = sprintf(buffer, "%ld",value); + if (trailer) { + buffer[n] = trailer; + n++; + buffer[n] = 0; + } + if (divisor != 1) { + memmove(buffer + n - 2, buffer + n - 3, 4); + buffer[n-2] = '.'; + n++; + } + return n; +} + +void decode_numa_list(int *numa, char *t) +{ + int node; + int nr; + + memset(numa, 0, MAX_NODES * sizeof(int)); + + while (*t == 'N') { + t++; + node = strtoul(t, &t, 10); + if (*t == '=') { + t++; + nr = strtoul(t, &t, 10); + numa[node] = nr; + if (node > highest_node) + highest_node = node; + } + while (*t == ' ') + t++; + } +} + +void slab_validate(struct slabinfo *s) +{ + set_obj(s, "validate", 1); +} + +void slab_shrink(struct slabinfo *s) +{ + set_obj(s, "shrink", 1); +} + +int line = 0; + +void first_line(void) +{ + printf("Name Objects Objsize Space " + "Slabs/Part/Cpu O/S O %%Fr %%Ef Flg\n"); +} + +/* + * Find the shortest alias of a slab + */ +struct aliasinfo *find_one_alias(struct slabinfo *find) +{ + struct aliasinfo *a; + struct aliasinfo *best = NULL; + + for(a = aliasinfo;a < aliasinfo + aliases; a++) { + if (a->slab == find && + (!best || strlen(best->name) < strlen(a->name))) { + best = a; + if (strncmp(a->name,"kmall", 5) == 0) + return best; + } + } + if (best) + return best; + fatal("Cannot find alias for %s\n", find->name); + return NULL; +} + +unsigned long slab_size(struct slabinfo *s) +{ + return s->slabs * (page_size << s->order); +} + + +void slabcache(struct slabinfo *s) +{ + char size_str[20]; + char dist_str[40]; + char flags[20]; + char *p = flags; + + if (skip_zero && !s->slabs) + return; + + store_size(size_str, slab_size(s)); + sprintf(dist_str,"%lu/%lu/%d", s->slabs, s->partial, s->cpu_slabs); + + if (!line++) + first_line(); + + if (s->aliases) + *p++ = '*'; + if (s->cache_dma) + *p++ = 'd'; + if (s->hwcache_align) + *p++ = 'A'; + if (s->poison) + *p++ = 'P'; + if (s->reclaim_account) + *p++ = 'a'; + if (s->red_zone) + *p++ = 'Z'; + if (s->sanity_checks) + *p++ = 'F'; + if (s->store_user) + *p++ = 'U'; + if (s->trace) + *p++ = 'T'; + + *p = 0; + printf("%-21s %8ld %7d %8s %14s %4d %1d %3ld %3ld %s\n", + s->name, s->objects, s->object_size, size_str, dist_str, + s->objs_per_slab, s->order, + s->slabs ? (s->partial * 100) / s->slabs : 100, + s->slabs ? (s->objects * s->object_size * 100) / + (s->slabs * (page_size << s->order)) : 100, + flags); +} + +void slab_numa(struct slabinfo *s) +{ + int node; + + if (!highest_node) + fatal("No NUMA information available.\n"); + + if (skip_zero && !s->slabs) + return; + + if (!line) { + printf("\nSlab Node "); + for(node = 0; node <= highest_node; node++) + printf(" %4d", node); + printf("\n----------------------"); + for(node = 0; node <= highest_node; node++) + printf("-----"); + printf("\n"); + } + printf("%-21s ", s->name); + for(node = 0; node <= highest_node; node++) { + char b[20]; + + store_size(b, s->numa[node]); + printf(" %4s", b); + } + printf("\n"); + line++; +} + +void show_tracking(struct slabinfo *s) +{ + printf("\n%s: Calls to allocate a slab object\n", s->name); + printf("---------------------------------------------------\n"); + if (read_obj("alloc_calls")) + printf(buffer); + + printf("%s: Calls to free a slab object\n", s->name); + printf("-----------------------------------------------\n"); + if (read_obj("free_calls")) + printf(buffer); + +} + +void totals(void) +{ + struct slabinfo *s; + + int used_slabs = 0; + char b1[20], b2[20], b3[20], b4[20]; + unsigned long long max = 1ULL << 63; + + /* Object size */ + unsigned long long min_objsize = max, max_objsize = 0, avg_objsize; + + /* Number of partial slabs in a slabcache */ + unsigned long long min_partial = max, max_partial = 0, + avg_partial, total_partial = 0; + + /* Number of slabs in a slab cache */ + unsigned long long min_slabs = max, max_slabs = 0, + avg_slabs, total_slabs = 0; + + /* Size of the whole slab */ + unsigned long long min_size = max, max_size = 0, + avg_size, total_size = 0; + + /* Bytes used for object storage in a slab */ + unsigned long long min_used = max, max_used = 0, + avg_used, total_used = 0; + + /* Waste: Bytes used for alignment and padding */ + unsigned long long min_waste = max, max_waste = 0, + avg_waste, total_waste = 0; + /* Number of objects in a slab */ + unsigned long long min_objects = max, max_objects = 0, + avg_objects, total_objects = 0; + /* Waste per object */ + unsigned long long min_objwaste = max, + max_objwaste = 0, avg_objwaste, + total_objwaste = 0; + + /* Memory per object */ + unsigned long long min_memobj = max, + max_memobj = 0, avg_memobj, + total_objsize = 0; + + /* Percentage of partial slabs per slab */ + unsigned long min_ppart = 100, max_ppart = 0, + avg_ppart, total_ppart = 0; + + /* Number of objects in partial slabs */ + unsigned long min_partobj = max, max_partobj = 0, + avg_partobj, total_partobj = 0; + + /* Percentage of partial objects of all objects in a slab */ + unsigned long min_ppartobj = 100, max_ppartobj = 0, + avg_ppartobj, total_ppartobj = 0; + + + for (s = slabinfo; s < slabinfo + slabs; s++) { + unsigned long long size; + unsigned long used; + unsigned long long wasted; + unsigned long long objwaste; + long long objects_in_partial_slabs; + unsigned long percentage_partial_slabs; + unsigned long percentage_partial_objs; + + if (!s->slabs || !s->objects) + continue; + + used_slabs++; + + size = slab_size(s); + used = s->objects * s->object_size; + wasted = size - used; + objwaste = s->slab_size - s->object_size; + + objects_in_partial_slabs = s->objects - + (s->slabs - s->partial - s ->cpu_slabs) * + s->objs_per_slab; + + if (objects_in_partial_slabs < 0) + objects_in_partial_slabs = 0; + + percentage_partial_slabs = s->partial * 100 / s->slabs; + if (percentage_partial_slabs > 100) + percentage_partial_slabs = 100; + + percentage_partial_objs = objects_in_partial_slabs * 100 + / s->objects; + + if (percentage_partial_objs > 100) + percentage_partial_objs = 100; + + if (s->object_size < min_objsize) + min_objsize = s->object_size; + if (s->partial < min_partial) + min_partial = s->partial; + if (s->slabs < min_slabs) + min_slabs = s->slabs; + if (size < min_size) + min_size = size; + if (wasted < min_waste) + min_waste = wasted; + if (objwaste < min_objwaste) + min_objwaste = objwaste; + if (s->objects < min_objects) + min_objects = s->objects; + if (used < min_used) + min_used = used; + if (objects_in_partial_slabs < min_partobj) + min_partobj = objects_in_partial_slabs; + if (percentage_partial_slabs < min_ppart) + min_ppart = percentage_partial_slabs; + if (percentage_partial_objs < min_ppartobj) + min_ppartobj = percentage_partial_objs; + if (s->slab_size < min_memobj) + min_memobj = s->slab_size; + + if (s->object_size > max_objsize) + max_objsize = s->object_size; + if (s->partial > max_partial) + max_partial = s->partial; + if (s->slabs > max_slabs) + max_slabs = s->slabs; + if (size > max_size) + max_size = size; + if (wasted > max_waste) + max_waste = wasted; + if (objwaste > max_objwaste) + max_objwaste = objwaste; + if (s->objects > max_objects) + max_objects = s->objects; + if (used > max_used) + max_used = used; + if (objects_in_partial_slabs > max_partobj) + max_partobj = objects_in_partial_slabs; + if (percentage_partial_slabs > max_ppart) + max_ppart = percentage_partial_slabs; + if (percentage_partial_objs > max_ppartobj) + max_ppartobj = percentage_partial_objs; + if (s->slab_size > max_memobj) + max_memobj = s->slab_size; + + total_partial += s->partial; + total_slabs += s->slabs; + total_size += size; + total_waste += wasted; + + total_objects += s->objects; + total_used += used; + total_partobj += objects_in_partial_slabs; + total_ppart += percentage_partial_slabs; + total_ppartobj += percentage_partial_objs; + + total_objwaste += s->objects * objwaste; + total_objsize += s->objects * s->slab_size; + } + + if (!total_objects) { + printf("No objects\n"); + return; + } + if (!used_slabs) { + printf("No slabs\n"); + return; + } + + /* Per slab averages */ + avg_partial = total_partial / used_slabs; + avg_slabs = total_slabs / used_slabs; + avg_size = total_size / used_slabs; + avg_waste = total_waste / used_slabs; + + avg_objects = total_objects / used_slabs; + avg_used = total_used / used_slabs; + avg_partobj = total_partobj / used_slabs; + avg_ppart = total_ppart / used_slabs; + avg_ppartobj = total_ppartobj / used_slabs; + + /* Per object object sizes */ + avg_objsize = total_used / total_objects; + avg_objwaste = total_objwaste / total_objects; + avg_partobj = total_partobj * 100 / total_objects; + avg_memobj = total_objsize / total_objects; + + printf("Slabcache Totals\n"); + printf("----------------\n"); + printf("Slabcaches : %3d Aliases : %3d->%-3d Active: %3d\n", + slabs, aliases, alias_targets, used_slabs); + + store_size(b1, total_size);store_size(b2, total_waste); + store_size(b3, total_waste * 100 / total_used); + printf("Memory used: %6s # Loss : %6s MRatio: %6s%%\n", b1, b2, b3); + + store_size(b1, total_objects);store_size(b2, total_partobj); + store_size(b3, total_partobj * 100 / total_objects); + printf("# Objects : %6s # PartObj: %6s ORatio: %6s%%\n", b1, b2, b3); + + printf("\n"); + printf("Per Cache Average Min Max Total\n"); + printf("---------------------------------------------------------\n"); + + store_size(b1, avg_objects);store_size(b2, min_objects); + store_size(b3, max_objects);store_size(b4, total_objects); + printf("#Objects %10s %10s %10s %10s\n", + b1, b2, b3, b4); + + store_size(b1, avg_slabs);store_size(b2, min_slabs); + store_size(b3, max_slabs);store_size(b4, total_slabs); + printf("#Slabs %10s %10s %10s %10s\n", + b1, b2, b3, b4); + + store_size(b1, avg_partial);store_size(b2, min_partial); + store_size(b3, max_partial);store_size(b4, total_partial); + printf("#PartSlab %10s %10s %10s %10s\n", + b1, b2, b3, b4); + store_size(b1, avg_ppart);store_size(b2, min_ppart); + store_size(b3, max_ppart); + store_size(b4, total_partial * 100 / total_slabs); + printf("%%PartSlab %10s%% %10s%% %10s%% %10s%%\n", + b1, b2, b3, b4); + + store_size(b1, avg_partobj);store_size(b2, min_partobj); + store_size(b3, max_partobj); + store_size(b4, total_partobj); + printf("PartObjs %10s %10s %10s %10s\n", + b1, b2, b3, b4); + + store_size(b1, avg_ppartobj);store_size(b2, min_ppartobj); + store_size(b3, max_ppartobj); + store_size(b4, total_partobj * 100 / total_objects); + printf("%% PartObj %10s%% %10s%% %10s%% %10s%%\n", + b1, b2, b3, b4); + + store_size(b1, avg_size);store_size(b2, min_size); + store_size(b3, max_size);store_size(b4, total_size); + printf("Memory %10s %10s %10s %10s\n", + b1, b2, b3, b4); + + store_size(b1, avg_used);store_size(b2, min_used); + store_size(b3, max_used);store_size(b4, total_used); + printf("Used %10s %10s %10s %10s\n", + b1, b2, b3, b4); + + store_size(b1, avg_waste);store_size(b2, min_waste); + store_size(b3, max_waste);store_size(b4, total_waste); + printf("Loss %10s %10s %10s %10s\n", + b1, b2, b3, b4); + + printf("\n"); + printf("Per Object Average Min Max\n"); + printf("---------------------------------------------\n"); + + store_size(b1, avg_memobj);store_size(b2, min_memobj); + store_size(b3, max_memobj); + printf("Memory %10s %10s %10s\n", + b1, b2, b3); + store_size(b1, avg_objsize);store_size(b2, min_objsize); + store_size(b3, max_objsize); + printf("User %10s %10s %10s\n", + b1, b2, b3); + + store_size(b1, avg_objwaste);store_size(b2, min_objwaste); + store_size(b3, max_objwaste); + printf("Loss %10s %10s %10s\n", + b1, b2, b3); +} + +void sort_slabs(void) +{ + struct slabinfo *s1,*s2; + + for (s1 = slabinfo; s1 < slabinfo + slabs; s1++) { + for (s2 = s1 + 1; s2 < slabinfo + slabs; s2++) { + int result; + + if (sort_size) + result = slab_size(s1) < slab_size(s2); + else + result = strcasecmp(s1->name, s2->name); + + if (show_inverted) + result = -result; + + if (result > 0) { + struct slabinfo t; + + memcpy(&t, s1, sizeof(struct slabinfo)); + memcpy(s1, s2, sizeof(struct slabinfo)); + memcpy(s2, &t, sizeof(struct slabinfo)); + } + } + } +} + +void sort_aliases(void) +{ + struct aliasinfo *a1,*a2; + + for (a1 = aliasinfo; a1 < aliasinfo + aliases; a1++) { + for (a2 = a1 + 1; a2 < aliasinfo + aliases; a2++) { + char *n1, *n2; + + n1 = a1->name; + n2 = a2->name; + if (show_alias && !show_inverted) { + n1 = a1->ref; + n2 = a2->ref; + } + if (strcasecmp(n1, n2) > 0) { + struct aliasinfo t; + + memcpy(&t, a1, sizeof(struct aliasinfo)); + memcpy(a1, a2, sizeof(struct aliasinfo)); + memcpy(a2, &t, sizeof(struct aliasinfo)); + } + } + } +} + +void link_slabs(void) +{ + struct aliasinfo *a; + struct slabinfo *s; + + for (a = aliasinfo; a < aliasinfo + aliases; a++) { + + for(s = slabinfo; s < slabinfo + slabs; s++) + if (strcmp(a->ref, s->name) == 0) { + a->slab = s; + s->refs++; + break; + } + if (s == slabinfo + slabs) + fatal("Unresolved alias %s\n", a->ref); + } +} + +void alias(void) +{ + struct aliasinfo *a; + char *active = NULL; + + sort_aliases(); + link_slabs(); + + for(a = aliasinfo; a < aliasinfo + aliases; a++) { + + if (!show_single_ref && a->slab->refs == 1) + continue; + + if (!show_inverted) { + if (active) { + if (strcmp(a->slab->name, active) == 0) { + printf(" %s", a->name); + continue; + } + } + printf("\n%-20s <- %s", a->slab->name, a->name); + active = a->slab->name; + } + else + printf("%-20s -> %s\n", a->name, a->slab->name); + } + if (active) + printf("\n"); +} + + +void rename_slabs(void) +{ + struct slabinfo *s; + struct aliasinfo *a; + + for (s = slabinfo; s < slabinfo + slabs; s++) { + if (*s->name != ':') + continue; + + if (s->refs > 1 && !show_first_alias) + continue; + + a = find_one_alias(s); + + s->name = a->name; + } +} + +int slab_mismatch(char *slab) +{ + return regexec(&pattern, slab, 0, NULL, 0); +} + +void read_slab_dir(void) +{ + DIR *dir; + struct dirent *de; + struct slabinfo *slab = slabinfo; + struct aliasinfo *alias = aliasinfo; + char *p; + char *t; + int count; + + dir = opendir("."); + while ((de = readdir(dir))) { + if (de->d_name[0] == '.' || + slab_mismatch(de->d_name)) + continue; + switch (de->d_type) { + case DT_LNK: + alias->name = strdup(de->d_name); + count = readlink(de->d_name, buffer, sizeof(buffer)); + + if (count < 0) + fatal("Cannot read symlink %s\n", de->d_name); + + buffer[count] = 0; + p = buffer + count; + while (p > buffer && p[-1] != '/') + p--; + alias->ref = strdup(p); + alias++; + break; + case DT_DIR: + if (chdir(de->d_name)) + fatal("Unable to access slab %s\n", slab->name); + slab->name = strdup(de->d_name); + slab->alias = 0; + slab->refs = 0; + slab->aliases = get_obj("aliases"); + slab->align = get_obj("align"); + slab->cache_dma = get_obj("cache_dma"); + slab->cpu_slabs = get_obj("cpu_slabs"); + slab->destroy_by_rcu = get_obj("destroy_by_rcu"); + slab->hwcache_align = get_obj("hwcache_align"); + slab->object_size = get_obj("object_size"); + slab->objects = get_obj("objects"); + slab->objs_per_slab = get_obj("objs_per_slab"); + slab->order = get_obj("order"); + slab->partial = get_obj("partial"); + slab->partial = get_obj_and_str("partial", &t); + decode_numa_list(slab->numa_partial, t); + slab->poison = get_obj("poison"); + slab->reclaim_account = get_obj("reclaim_account"); + slab->red_zone = get_obj("red_zone"); + slab->sanity_checks = get_obj("sanity_checks"); + slab->slab_size = get_obj("slab_size"); + slab->slabs = get_obj_and_str("slabs", &t); + decode_numa_list(slab->numa, t); + slab->store_user = get_obj("store_user"); + slab->trace = get_obj("trace"); + chdir(".."); + if (slab->name[0] == ':') + alias_targets++; + slab++; + break; + default : + fatal("Unknown file type %lx\n", de->d_type); + } + } + closedir(dir); + slabs = slab - slabinfo; + aliases = alias - aliasinfo; + if (slabs > MAX_SLABS) + fatal("Too many slabs\n"); + if (aliases > MAX_ALIASES) + fatal("Too many aliases\n"); +} + +void output_slabs(void) +{ + struct slabinfo *slab; + + for (slab = slabinfo; slab < slabinfo + slabs; slab++) { + + if (slab->alias) + continue; + + + if (show_numa) + slab_numa(slab); + else + if (show_track) + show_tracking(slab); + else + if (validate) + slab_validate(slab); + else + if (shrink) + slab_shrink(slab); + else { + if (show_slab) + slabcache(slab); + } + } +} + +struct option opts[] = { + { "aliases", 0, NULL, 'a' }, + { "slabs", 0, NULL, 'l' }, + { "numa", 0, NULL, 'n' }, + { "zero", 0, NULL, 'z' }, + { "help", 0, NULL, 'h' }, + { "validate", 0, NULL, 'v' }, + { "first-alias", 0, NULL, 'f' }, + { "shrink", 0, NULL, 's' }, + { "track", 0, NULL, 't'}, + { "inverted", 0, NULL, 'i'}, + { "1ref", 0, NULL, '1'}, + { NULL, 0, NULL, 0 } +}; + +int main(int argc, char *argv[]) +{ + int c; + int err; + char *pattern_source; + + page_size = getpagesize(); + if (chdir("/sys/slab")) + fatal("This kernel does not have SLUB support.\n"); + + while ((c = getopt_long(argc, argv, "afhil1npstvzTS", opts, NULL)) != -1) + switch(c) { + case '1': + show_single_ref = 1; + break; + case 'a': + show_alias = 1; + break; + case 'f': + show_first_alias = 1; + break; + case 'h': + usage(); + return 0; + case 'i': + show_inverted = 1; + break; + case 'n': + show_numa = 1; + break; + case 's': + shrink = 1; + break; + case 'l': + show_slab = 1; + break; + case 't': + show_track = 1; + break; + case 'v': + validate = 1; + break; + case 'z': + skip_zero = 0; + break; + case 'T': + show_totals = 1; + break; + case 'S': + sort_size = 1; + break; + + default: + fatal("%s: Invalid option '%c'\n", argv[0], optopt); + + } + + if (!show_slab && !show_alias && !show_track + && !validate && !shrink) + show_slab = 1; + + if (argc > optind) + pattern_source = argv[optind]; + else + pattern_source = ".*"; + + err = regcomp(&pattern, pattern_source, REG_ICASE|REG_NOSUB); + if (err) + fatal("%s: Invalid pattern '%s' code %d\n", + argv[0], pattern_source, err); + read_slab_dir(); + if (show_alias) + alias(); + else + if (show_totals) + totals(); + else { + link_slabs(); + rename_slabs(); + sort_slabs(); + output_slabs(); + } + return 0; +} -- cgit v1.2.3 From 2b744c01a54fe0c9974ff1b29522f25f07084053 Mon Sep 17 00:00:00 2001 From: Yasunori Goto Date: Sun, 6 May 2007 14:49:59 -0700 Subject: mm: fix handling of panic_on_oom when cpusets are in use The current panic_on_oom may not work if there is a process using cpusets/mempolicy, because other nodes' memory may remain. But some people want failover by panic ASAP even if they are used. This patch makes new setting for its request. This is tested on my ia64 box which has 3 nodes. Signed-off-by: Yasunori Goto Signed-off-by: Benjamin LaHaise Cc: Christoph Lameter Cc: Paul Jackson Cc: Ethan Solomita Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/sysctl/vm.txt | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'Documentation') diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index e96a341eb7e..1d192565e18 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -197,11 +197,22 @@ and may not be fast. panic_on_oom -This enables or disables panic on out-of-memory feature. If this is set to 1, -the kernel panics when out-of-memory happens. If this is set to 0, the kernel -will kill some rogue process, called oom_killer. Usually, oom_killer can kill -rogue processes and system will survive. If you want to panic the system -rather than killing rogue processes, set this to 1. +This enables or disables panic on out-of-memory feature. -The default value is 0. +If this is set to 0, the kernel will kill some rogue process, +called oom_killer. Usually, oom_killer can kill rogue processes and +system will survive. + +If this is set to 1, the kernel panics when out-of-memory happens. +However, if a process limits using nodes by mempolicy/cpusets, +and those nodes become memory exhaustion status, one process +may be killed by oom-killer. No panic occurs in this case. +Because other nodes' memory may be free. This means system total status +may be not fatal yet. +If this is set to 2, the kernel panics compulsorily even on the +above-mentioned. + +The default value is 0. +1 and 2 are for failover of clustering. Please select either +according to your policy of failover. -- cgit v1.2.3 From 1394f03221790a988afc3e4b3cb79f2e477246a9 Mon Sep 17 00:00:00 2001 From: Bryan Wu Date: Sun, 6 May 2007 14:50:22 -0700 Subject: blackfin architecture This adds support for the Analog Devices Blackfin processor architecture, and currently supports the BF533, BF532, BF531, BF537, BF536, BF534, and BF561 (Dual Core) devices, with a variety of development platforms including those avaliable from Analog Devices (BF533-EZKit, BF533-STAMP, BF537-STAMP, BF561-EZKIT), and Bluetechnix! Tinyboards. The Blackfin architecture was jointly developed by Intel and Analog Devices Inc. (ADI) as the Micro Signal Architecture (MSA) core and introduced it in December of 2000. Since then ADI has put this core into its Blackfin processor family of devices. The Blackfin core has the advantages of a clean, orthogonal,RISC-like microprocessor instruction set. It combines a dual-MAC (Multiply/Accumulate), state-of-the-art signal processing engine and single-instruction, multiple-data (SIMD) multimedia capabilities into a single instruction-set architecture. The Blackfin architecture, including the instruction set, is described by the ADSP-BF53x/BF56x Blackfin Processor Programming Reference http://blackfin.uclinux.org/gf/download/frsrelease/29/2549/Blackfin_PRM.pdf The Blackfin processor is already supported by major releases of gcc, and there are binary and source rpms/tarballs for many architectures at: http://blackfin.uclinux.org/gf/project/toolchain/frs There is complete documentation, including "getting started" guides available at: http://docs.blackfin.uclinux.org/ which provides links to the sources and patches you will need in order to set up a cross-compiling environment for bfin-linux-uclibc This patch, as well as the other patches (toolchain, distribution, uClibc) are actively supported by Analog Devices Inc, at: http://blackfin.uclinux.org/ We have tested this on LTP, and our test plan (including pass/fails) can be found at: http://docs.blackfin.uclinux.org/doku.php?id=testing_the_linux_kernel [m.kozlowski@tuxland.pl: balance parenthesis in blackfin header files] Signed-off-by: Bryan Wu Signed-off-by: Mariusz Kozlowski Signed-off-by: Aubrey Li Signed-off-by: Jie Zhang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/blackfin/00-INDEX | 11 ++ Documentation/blackfin/Filesystems | 169 +++++++++++++++++++++++++++++++ Documentation/blackfin/cache-lock.txt | 48 +++++++++ Documentation/blackfin/cachefeatures.txt | 65 ++++++++++++ 4 files changed, 293 insertions(+) create mode 100644 Documentation/blackfin/00-INDEX create mode 100644 Documentation/blackfin/Filesystems create mode 100644 Documentation/blackfin/cache-lock.txt create mode 100644 Documentation/blackfin/cachefeatures.txt (limited to 'Documentation') diff --git a/Documentation/blackfin/00-INDEX b/Documentation/blackfin/00-INDEX new file mode 100644 index 00000000000..7cb3b356b24 --- /dev/null +++ b/Documentation/blackfin/00-INDEX @@ -0,0 +1,11 @@ +00-INDEX + - This file + +cache-lock.txt + - HOWTO for blackfin cache locking. + +cachefeatures.txt + - Supported cache features. + +Filesystems + - Requirements for mounting the root file system. diff --git a/Documentation/blackfin/Filesystems b/Documentation/blackfin/Filesystems new file mode 100644 index 00000000000..51260a1b803 --- /dev/null +++ b/Documentation/blackfin/Filesystems @@ -0,0 +1,169 @@ +/* + * File: Documentation/blackfin/Filesystems + * Based on: + * Author: + * + * Created: + * Description: This file contains the simple DMA Implementation for Blackfin + * + * Rev: $Id: Filesystems 2384 2006-11-01 04:12:43Z magicyang $ + * + * Modified: + * Copyright 2004-2006 Analog Devices Inc. + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + */ + + How to mount the root file system in uClinux/Blackfin + ----------------------------------------------------- + +1 Mounting EXT3 File system. + ------------------------ + + Creating an EXT3 File system for uClinux/Blackfin: + + +Please follow the steps to form the EXT3 File system and mount the same as root +file system. + +a Make an ext3 file system as large as you want the final root file + system. + + mkfs.ext3 /dev/ram0 + +b Mount this Empty file system on a free directory as: + + mount -t ext3 /dev/ram0 ./test + where ./test is the empty directory. + +c Copy your root fs directory that you have so carefully made over. + + cp -af /tmp/my_final_rootfs_files/* ./test + + (For ex: cp -af uClinux-dist/romfs/* ./test) + +d If you have done everything right till now you should be able to see + the required "root" dir's (that's etc, root, bin, lib, sbin...) + +e Now unmount the file system + + umount ./test + +f Create the root file system image. + + dd if=/dev/ram0 bs=1k count= \ + > ext3fs.img + + +Now you have to tell the kernel that will be mounting this file system as +rootfs. +So do a make menuconfig under kernel and select the Ext3 journaling file system +support under File system --> submenu. + + +2. Mounting EXT2 File system. + ------------------------- + +By default the ext2 file system image will be created if you invoke make from +the top uClinux-dist directory. + + +3. Mounting CRAMFS File System + ---------------------------- + +To create a CRAMFS file system image execute the command + + mkfs.cramfs ./test cramfs.img + + where ./test is the target directory. + + +4. Mounting ROMFS File System + -------------------------- + +To create a ROMFS file system image execute the command + + genromfs -v -V "ROMdisk" -f romfs.img -d ./test + + where ./test is the target directory + + +5. Mounting the JFFS2 Filesystem + ----------------------------- + +To create a compressed JFFS filesystem (JFFS2), please execute the command + + mkfs.jffs2 -d ./test -o jffs2.img + + where ./test is the target directory. + +However, please make sure the following is in your kernel config. + +/* + * RAM/ROM/Flash chip drivers + */ +#define CONFIG_MTD_CFI 1 +#define CONFIG_MTD_ROM 1 +/* + * Mapping drivers for chip access + */ +#define CONFIG_MTD_COMPLEX_MAPPINGS 1 +#define CONFIG_MTD_BF533 1 +#undef CONFIG_MTD_UCLINUX + +Through the u-boot boot loader, use the jffs2.img in the corresponding +partition made in linux-2.6.x/drivers/mtd/maps/bf533_flash.c. + +NOTE - Currently the Flash driver is available only for EZKIT. Watch out for a + STAMP driver soon. + + +6. Mounting the NFS File system + ----------------------------- + + For mounting the NFS please do the following in the kernel config. + + In Networking Support --> Networking options --> TCP/IP networking --> + IP: kernel level autoconfiguration + + Enable BOOTP Support. + + In Kernel hacking --> Compiled-in kernel boot parameter add the following + + root=/dev/nfs rw ip=bootp + + In File system --> Network File system, Enable + + NFS file system support --> NFSv3 client support + Root File system on NFS + + in uClibc menuconfig, do the following + In Networking Support + enable Remote Procedure Call (RPC) support + Full RPC Support + + On the Host side, ensure that /etc/dhcpd.conf looks something like this + + ddns-update-style ad-hoc; + allow bootp; + subnet 10.100.4.0 netmask 255.255.255.0 { + default-lease-time 122209600; + max-lease-time 31557600; + group { + host bf533 { + hardware ethernet 00:CF:52:49:C3:01; + fixed-address 10.100.4.50; + option root-path "/home/nfsmount"; + } + } + + ensure that /etc/exports looks something like this + /home/nfsmount *(rw,no_root_squash,no_all_squash) + + run the following commands as root (may differ depending on your + distribution) : + - service nfs start + - service portmap start + - service dhcpd start + - /usr/sbin/exportfs diff --git a/Documentation/blackfin/cache-lock.txt b/Documentation/blackfin/cache-lock.txt new file mode 100644 index 00000000000..88ba1e6c31c --- /dev/null +++ b/Documentation/blackfin/cache-lock.txt @@ -0,0 +1,48 @@ +/* + * File: Documentation/blackfin/cache-lock.txt + * Based on: + * Author: + * + * Created: + * Description: This file contains the simple DMA Implementation for Blackfin + * + * Rev: $Id: cache-lock.txt 2384 2006-11-01 04:12:43Z magicyang $ + * + * Modified: + * Copyright 2004-2006 Analog Devices Inc. + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + */ + +How to lock your code in cache in uClinux/blackfin +-------------------------------------------------- + +There are only a few steps required to lock your code into the cache. +Currently you can lock the code by Way. + +Below are the interface provided for locking the cache. + + +1. cache_grab_lock(int Ways); + +This function grab the lock for locking your code into the cache specified +by Ways. + + +2. cache_lock(int Ways); + +This function should be called after your critical code has been executed. +Once the critical code exits, the code is now loaded into the cache. This +function locks the code into the cache. + + +So, the example sequence will be: + + cache_grab_lock(WAY0_L); /* Grab the lock */ + + critical_code(); /* Execute the code of interest */ + + cache_lock(WAY0_L); /* Lock the cache */ + +Where WAY0_L signifies WAY0 locking. diff --git a/Documentation/blackfin/cachefeatures.txt b/Documentation/blackfin/cachefeatures.txt new file mode 100644 index 00000000000..0fbec23becb --- /dev/null +++ b/Documentation/blackfin/cachefeatures.txt @@ -0,0 +1,65 @@ +/* + * File: Documentation/blackfin/cachefeatures.txt + * Based on: + * Author: + * + * Created: + * Description: This file contains the simple DMA Implementation for Blackfin + * + * Rev: $Id: cachefeatures.txt 2384 2006-11-01 04:12:43Z magicyang $ + * + * Modified: + * Copyright 2004-2006 Analog Devices Inc. + * + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + */ + + - Instruction and Data cache initialization. + icache_init(); + dcache_init(); + + - Instruction and Data cache Invalidation Routines, when flushing the + same is not required. + _icache_invalidate(); + _dcache_invalidate(); + + Also, for invalidating the entire instruction and data cache, the below + routines are provided (another method for invalidation, refer page no 267 and 287 of + ADSP-BF533 Hardware Reference manual) + + invalidate_entire_dcache(); + invalidate_entire_icache(); + + -External Flushing of Instruction and data cache routines. + + flush_instruction_cache(); + flush_data_cache(); + + - Internal Flushing of Instruction and Data Cache. + + icplb_flush(); + dcplb_flush(); + + - Locking the cache. + + cache_grab_lock(); + cache_lock(); + + Please refer linux-2.6.x/Documentation/blackfin/cache-lock.txt for how to + lock the cache. + + Locking the cache is optional feature. + + - Miscellaneous cache functions. + + flush_cache_all(); + flush_cache_mm(); + invalidate_dcache_range(); + flush_dcache_range(); + flush_dcache_page(); + flush_cache_range(); + flush_cache_page(); + invalidate_dcache_range(); + flush_page_to_ram(); + -- cgit v1.2.3 From f0ced9b229cfbc76b5db9837b4b256b602d56610 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 6 May 2007 14:50:50 -0700 Subject: power management: change /sys/power/disk display Change /sys/power/disk to display all valid modes as well as the currently selected one in a fashion known from the LED subsystem. This changes userspace API, but it is apparently not used much (we asked some userspace developers) Signed-off-by: Johannes Berg Acked-by: "Rafael J. Wysocki" Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/power/interface.txt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/power/interface.txt b/Documentation/power/interface.txt index 8c5b41bf3f3..fd5192a8fa8 100644 --- a/Documentation/power/interface.txt +++ b/Documentation/power/interface.txt @@ -34,8 +34,12 @@ for 5 seconds, resume devices, unfreeze tasks and enable nonboot CPUs. Then, we are able to look in the log messages and work out, for example, which code is being slow and which device drivers are misbehaving. -Reading from this file will display what the mode is currently set -to. Writing to this file will accept one of +Reading from this file will display all supported modes and the currently +selected one in brackets, for example + + [shutdown] reboot test testproc + +Writing to this file will accept one of 'platform' (only if the platform supports it) 'shutdown' -- cgit v1.2.3