aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/kernel-parameters.txt43
-rw-r--r--Documentation/sched-design-CFS.txt119
-rw-r--r--arch/i386/kernel/smpboot.c12
-rw-r--r--arch/i386/kernel/tsc.c9
-rw-r--r--arch/ia64/kernel/setup.c6
-rw-r--r--arch/mips/kernel/smp.c11
-rw-r--r--arch/sparc/kernel/smp.c10
-rw-r--r--arch/sparc64/kernel/smp.c27
-rw-r--r--drivers/ide/arm/icside.c16
-rw-r--r--drivers/ide/cris/ide-cris.c2
-rw-r--r--drivers/ide/ide-cd.c6
-rw-r--r--drivers/ide/ide-cd.h2
-rw-r--r--drivers/ide/ide-disk.c8
-rw-r--r--drivers/ide/ide-dma.c110
-rw-r--r--drivers/ide/ide-io.c4
-rw-r--r--drivers/ide/ide-iops.c8
-rw-r--r--drivers/ide/ide-probe.c10
-rw-r--r--drivers/ide/ide-proc.c34
-rw-r--r--drivers/ide/ide-timing.h56
-rw-r--r--drivers/ide/ide.c33
-rw-r--r--drivers/ide/legacy/hd.c2
-rw-r--r--drivers/ide/legacy/macide.c14
-rw-r--r--drivers/ide/mips/au1xxx-ide.c24
-rw-r--r--drivers/ide/pci/aec62xx.c119
-rw-r--r--drivers/ide/pci/alim15x3.c78
-rw-r--r--drivers/ide/pci/amd74xx.c127
-rw-r--r--drivers/ide/pci/atiixp.c5
-rw-r--r--drivers/ide/pci/cmd64x.c130
-rw-r--r--drivers/ide/pci/cs5535.c6
-rw-r--r--drivers/ide/pci/hpt366.c170
-rw-r--r--drivers/ide/pci/it8213.c8
-rw-r--r--drivers/ide/pci/it821x.c9
-rw-r--r--drivers/ide/pci/jmicron.c20
-rw-r--r--drivers/ide/pci/pdc202xx_new.c9
-rw-r--r--drivers/ide/pci/pdc202xx_old.c35
-rw-r--r--drivers/ide/pci/piix.c45
-rw-r--r--drivers/ide/pci/scc_pata.c2
-rw-r--r--drivers/ide/pci/serverworks.c103
-rw-r--r--drivers/ide/pci/sgiioc4.c20
-rw-r--r--drivers/ide/pci/siimage.c18
-rw-r--r--drivers/ide/pci/sis5513.c34
-rw-r--r--drivers/ide/pci/sl82c105.c20
-rw-r--r--drivers/ide/pci/slc90e66.c5
-rw-r--r--drivers/ide/pci/tc86c001.c4
-rw-r--r--drivers/ide/pci/via82cxxx.c175
-rw-r--r--drivers/ide/ppc/pmac.c42
-rw-r--r--fs/jfs/endian24.h2
-rw-r--r--fs/jfs/jfs_debug.c28
-rw-r--r--fs/jfs/jfs_debug.h2
-rw-r--r--fs/jfs/jfs_dinode.h42
-rw-r--r--fs/jfs/jfs_dmap.c419
-rw-r--r--fs/jfs/jfs_dmap.h118
-rw-r--r--fs/jfs/jfs_dtree.c105
-rw-r--r--fs/jfs/jfs_dtree.h2
-rw-r--r--fs/jfs/jfs_extent.c102
-rw-r--r--fs/jfs/jfs_filsys.h13
-rw-r--r--fs/jfs/jfs_imap.c296
-rw-r--r--fs/jfs/jfs_imap.h98
-rw-r--r--fs/jfs/jfs_incore.h4
-rw-r--r--fs/jfs/jfs_logmgr.c90
-rw-r--r--fs/jfs/jfs_logmgr.h26
-rw-r--r--fs/jfs/jfs_metapage.c3
-rw-r--r--fs/jfs/jfs_mount.c6
-rw-r--r--fs/jfs/jfs_txnmgr.c302
-rw-r--r--fs/jfs/jfs_txnmgr.h2
-rw-r--r--fs/jfs/jfs_types.h20
-rw-r--r--fs/jfs/jfs_umount.c2
-rw-r--r--fs/jfs/jfs_xtree.c428
-rw-r--r--fs/jfs/jfs_xtree.h48
-rw-r--r--fs/jfs/namei.c26
-rw-r--r--fs/jfs/resize.c48
-rw-r--r--fs/jfs/xattr.c9
-rw-r--r--fs/proc/array.c59
-rw-r--r--fs/proc/base.c71
-rw-r--r--include/asm-generic/bitops/sched.h21
-rw-r--r--include/asm-mips/mach-au1x00/au1xxx_ide.h28
-rw-r--r--include/linux/hardirq.h13
-rw-r--r--include/linux/ide.h18
-rw-r--r--include/linux/sched.h251
-rw-r--r--include/linux/topology.h12
-rw-r--r--include/linux/wait.h16
-rw-r--r--init/main.c5
-rw-r--r--kernel/delayacct.c10
-rw-r--r--kernel/exit.c5
-rw-r--r--kernel/fork.c4
-rw-r--r--kernel/posix-cpu-timers.c34
-rw-r--r--kernel/sched.c3021
-rw-r--r--kernel/sched_debug.c275
-rw-r--r--kernel/sched_fair.c1131
-rw-r--r--kernel/sched_idletask.c71
-rw-r--r--kernel/sched_rt.c255
-rw-r--r--kernel/sched_stats.h235
-rw-r--r--kernel/softirq.c1
-rw-r--r--kernel/sysctl.c80
-rw-r--r--lib/Kconfig.debug9
95 files changed, 5518 insertions, 4098 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index af50f9bbe68..4d880b3d1f3 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1014,49 +1014,6 @@ and is between 256 and 4096 characters. It is defined in the file
mga= [HW,DRM]
- migration_cost=
- [KNL,SMP] debug: override scheduler migration costs
- Format: <level-1-usecs>,<level-2-usecs>,...
- This debugging option can be used to override the
- default scheduler migration cost matrix. The numbers
- are indexed by 'CPU domain distance'.
- E.g. migration_cost=1000,2000,3000 on an SMT NUMA
- box will set up an intra-core migration cost of
- 1 msec, an inter-core migration cost of 2 msecs,
- and an inter-node migration cost of 3 msecs.
-
- WARNING: using the wrong values here can break
- scheduler performance, so it's only for scheduler
- development purposes, not production environments.
-
- migration_debug=
- [KNL,SMP] migration cost auto-detect verbosity
- Format=<0|1|2>
- If a system's migration matrix reported at bootup
- seems erroneous then this option can be used to
- increase verbosity of the detection process.
- We default to 0 (no extra messages), 1 will print
- some more information, and 2 will be really
- verbose (probably only useful if you also have a
- serial console attached to the system).
-
- migration_factor=
- [KNL,SMP] multiply/divide migration costs by a factor
- Format=<percent>
- This debug option can be used to proportionally
- increase or decrease the auto-detected migration
- costs for all entries of the migration matrix.
- E.g. migration_factor=150 will increase migration
- costs by 50%. (and thus the scheduler will be less
- eager migrating cache-hot tasks)
- migration_factor=80 will decrease migration costs
- by 20%. (thus the scheduler will be more eager to
- migrate tasks)
-
- WARNING: using the wrong values here can break
- scheduler performance, so it's only for scheduler
- development purposes, not production environments.
-
mousedev.tap_time=
[MOUSE] Maximum time between finger touching and
leaving touchpad surface for touch to be considered
diff --git a/Documentation/sched-design-CFS.txt b/Documentation/sched-design-CFS.txt
new file mode 100644
index 00000000000..16feebb7bdc
--- /dev/null
+++ b/Documentation/sched-design-CFS.txt
@@ -0,0 +1,119 @@
+
+This is the CFS scheduler.
+
+80% of CFS's design can be summed up in a single sentence: CFS basically
+models an "ideal, precise multi-tasking CPU" on real hardware.
+
+"Ideal multi-tasking CPU" is a (non-existent :-)) CPU that has 100%
+physical power and which can run each task at precise equal speed, in
+parallel, each at 1/nr_running speed. For example: if there are 2 tasks
+running then it runs each at 50% physical power - totally in parallel.
+
+On real hardware, we can run only a single task at once, so while that
+one task runs, the other tasks that are waiting for the CPU are at a
+disadvantage - the current task gets an unfair amount of CPU time. In
+CFS this fairness imbalance is expressed and tracked via the per-task
+p->wait_runtime (nanosec-unit) value. "wait_runtime" is the amount of
+time the task should now run on the CPU for it to become completely fair
+and balanced.
+
+( small detail: on 'ideal' hardware, the p->wait_runtime value would
+ always be zero - no task would ever get 'out of balance' from the
+ 'ideal' share of CPU time. )
+
+CFS's task picking logic is based on this p->wait_runtime value and it
+is thus very simple: it always tries to run the task with the largest
+p->wait_runtime value. In other words, CFS tries to run the task with
+the 'gravest need' for more CPU time. So CFS always tries to split up
+CPU time between runnable tasks as close to 'ideal multitasking
+hardware' as possible.
+
+Most of the rest of CFS's design just falls out of this really simple
+concept, with a few add-on embellishments like nice levels,
+multiprocessing and various algorithm variants to recognize sleepers.
+
+In practice it works like this: the system runs a task a bit, and when
+the task schedules (or a scheduler tick happens) the task's CPU usage is
+'accounted for': the (small) time it just spent using the physical CPU
+is deducted from p->wait_runtime. [minus the 'fair share' it would have
+gotten anyway]. Once p->wait_runtime gets low enough so that another
+task becomes the 'leftmost task' of the time-ordered rbtree it maintains
+(plus a small amount of 'granularity' distance relative to the leftmost
+task so that we do not over-schedule tasks and trash the cache) then the
+new leftmost task is picked and the current task is preempted.
+
+The rq->fair_clock value tracks the 'CPU time a runnable task would have
+fairly gotten, had it been runnable during that time'. So by using
+rq->fair_clock values we can accurately timestamp and measure the
+'expected CPU time' a task should have gotten. All runnable tasks are
+sorted in the rbtree by the "rq->fair_clock - p->wait_runtime" key, and
+CFS picks the 'leftmost' task and sticks to it. As the system progresses
+forwards, newly woken tasks are put into the tree more and more to the
+right - slowly but surely giving a chance for every task to become the
+'leftmost task' and thus get on the CPU within a deterministic amount of
+time.
+
+Some implementation details:
+
+ - the introduction of Scheduling Classes: an extensible hierarchy of
+ scheduler modules. These modules encapsulate scheduling policy
+ details and are handled by the scheduler core without the core
+ code assuming about them too much.
+
+ - sched_fair.c implements the 'CFS desktop scheduler': it is a
+ replacement for the vanilla scheduler's SCHED_OTHER interactivity
+ code.
+
+ I'd like to give credit to Con Kolivas for the general approach here:
+ he has proven via RSDL/SD that 'fair scheduling' is possible and that
+ it results in better desktop scheduling. Kudos Con!
+
+ The CFS patch uses a completely different approach and implementation
+ from RSDL/SD. My goal was to make CFS's interactivity quality exceed
+ that of RSDL/SD, which is a high standard to meet :-) Testing
+ feedback is welcome to decide this one way or another. [ and, in any
+ case, all of SD's logic could be added via a kernel/sched_sd.c module
+ as well, if Con is interested in such an approach. ]
+
+ CFS's design is quite radical: it does not use runqueues, it uses a
+ time-ordered rbtree to build a 'timeline' of future task execution,
+ and thus has no 'array switch' artifacts (by which both the vanilla
+ scheduler and RSDL/SD are affected).
+
+ CFS uses nanosecond granularity accounting and does not rely on any
+ jiffies or other HZ detail. Thus the CFS scheduler has no notion of
+ 'timeslices' and has no heuristics whatsoever. There is only one
+ central tunable:
+
+ /proc/sys/kernel/sched_granularity_ns
+
+ which can be used to tune the scheduler from 'desktop' (low
+ latencies) to 'server' (good batching) workloads. It defaults to a
+ setting suitable for desktop workloads. SCHED_BATCH is handled by the
+ CFS scheduler module too.
+
+ Due to its design, the CFS scheduler is not prone to any of the
+ 'attacks' that exist today against the heuristics of the stock
+ scheduler: fiftyp.c, thud.c, chew.c, ring-test.c, massive_intr.c all
+ work fine and do not impact interactivity and produce the expected
+ behavior.
+
+ the CFS scheduler has a much stronger handling of nice levels and
+ SCHED_BATCH: both types of workloads should be isolated much more
+ agressively than under the vanilla scheduler.
+
+ ( another detail: due to nanosec accounting and timeline sorting,
+ sched_yield() support is very simple under CFS, and in fact under
+ CFS sched_yield() behaves much better than under any other
+ scheduler i have tested so far. )
+
+ - sched_rt.c implements SCHED_FIFO and SCHED_RR semantics, in a simpler
+ way than the vanilla scheduler does. It uses 100 runqueues (for all
+ 100 RT priority levels, instead of 140 in the vanilla scheduler)
+ and it needs no expired array.
+
+ - reworked/sanitized SMP load-balancing: the runqueue-walking
+ assumptions are gone from the load-balancing code now, and
+ iterators of the scheduling modules are used. The balancing code got
+ quite a bit simpler as a result.
+
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index 88baed1e7e8..0b2954534b8 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -941,17 +941,6 @@ exit:
}
#endif
-static void smp_tune_scheduling(void)
-{
- if (cpu_khz) {
- /* cache size in kB */
- long cachesize = boot_cpu_data.x86_cache_size;
-
- if (cachesize > 0)
- max_cache_size = cachesize * 1024;
- }
-}
-
/*
* Cycle through the processors sending APIC IPIs to boot each.
*/
@@ -980,7 +969,6 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
current_thread_info()->cpu = 0;
- smp_tune_scheduling();
set_cpu_sibling_map(0);
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c
index f64b81f3033..ea63a30ca3e 100644
--- a/arch/i386/kernel/tsc.c
+++ b/arch/i386/kernel/tsc.c
@@ -4,6 +4,7 @@
* See comments there for proper credits.
*/
+#include <linux/sched.h>
#include <linux/clocksource.h>
#include <linux/workqueue.h>
#include <linux/cpufreq.h>
@@ -106,8 +107,13 @@ unsigned long long sched_clock(void)
/*
* Fall back to jiffies if there's no TSC available:
+ * ( But note that we still use it if the TSC is marked
+ * unstable. We do this because unlike Time Of Day,
+ * the scheduler clock tolerates small errors and it's
+ * very important for it to be as fast as the platform
+ * can achive it. )
*/
- if (unlikely(!tsc_enabled))
+ if (unlikely(!tsc_enabled && !tsc_unstable))
/* No locking but a rare wrong value is not a big deal: */
return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
@@ -277,6 +283,7 @@ static struct clocksource clocksource_tsc = {
void mark_tsc_unstable(char *reason)
{
+ sched_clock_unstable_event();
if (!tsc_unstable) {
tsc_unstable = 1;
tsc_enabled = 0;
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index eaa6a24bc0b..188fb73c684 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -805,7 +805,6 @@ static void __cpuinit
get_max_cacheline_size (void)
{
unsigned long line_size, max = 1;
- unsigned int cache_size = 0;
u64 l, levels, unique_caches;
pal_cache_config_info_t cci;
s64 status;
@@ -835,8 +834,6 @@ get_max_cacheline_size (void)
line_size = 1 << cci.pcci_line_size;
if (line_size > max)
max = line_size;
- if (cache_size < cci.pcci_cache_size)
- cache_size = cci.pcci_cache_size;
if (!cci.pcci_unified) {
status = ia64_pal_cache_config_info(l,
/* cache_type (instruction)= */ 1,
@@ -853,9 +850,6 @@ get_max_cacheline_size (void)
ia64_i_cache_stride_shift = cci.pcci_stride;
}
out:
-#ifdef CONFIG_SMP
- max_cache_size = max(max_cache_size, cache_size);
-#endif
if (max > ia64_max_cacheline_size)
ia64_max_cacheline_size = max;
}
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 67edfa7ed93..a1b017f2dbb 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -51,16 +51,6 @@ int __cpu_logical_map[NR_CPUS]; /* Map logical to physical */
EXPORT_SYMBOL(phys_cpu_present_map);
EXPORT_SYMBOL(cpu_online_map);
-/* This happens early in bootup, can't really do it better */
-static void smp_tune_scheduling (void)
-{
- struct cache_desc *cd = &current_cpu_data.scache;
- unsigned long cachesize = cd->linesz * cd->sets * cd->ways;
-
- if (cachesize > max_cache_size)
- max_cache_size = cachesize;
-}
-
extern void __init calibrate_delay(void);
extern ATTRIB_NORET void cpu_idle(void);
@@ -228,7 +218,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
{
init_new_context(current, &init_mm);
current_thread_info()->cpu = 0;
- smp_tune_scheduling();
plat_prepare_cpus(max_cpus);
#ifndef CONFIG_HOTPLUG_CPU
cpu_present_map = cpu_possible_map;
diff --git a/arch/sparc/kernel/smp.c b/arch/sparc/kernel/smp.c
index 4d9ad59031b..4fea3ac7bff 100644
--- a/arch/sparc/kernel/smp.c
+++ b/arch/sparc/kernel/smp.c
@@ -68,16 +68,6 @@ void __cpuinit smp_store_cpu_info(int id)
cpu_data(id).prom_node = cpu_node;
cpu_data(id).mid = cpu_get_hwmid(cpu_node);
- /* this is required to tune the scheduler correctly */
- /* is it possible to have CPUs with different cache sizes? */
- if (id == boot_cpu_id) {
- int cache_line,cache_nlines;
- cache_line = 0x20;
- cache_line = prom_getintdefault(cpu_node, "ecache-line-size", cache_line);
- cache_nlines = 0x8000;
- cache_nlines = prom_getintdefault(cpu_node, "ecache-nlines", cache_nlines);
- max_cache_size = cache_line * cache_nlines;
- }
if (cpu_data(id).mid < 0)
panic("No MID found for CPU%d at node 0x%08d", id, cpu_node);
}
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index 4dcd7d0b60f..40e40f968d6 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -1163,32 +1163,6 @@ int setup_profiling_timer(unsigned int multiplier)
return -EINVAL;
}
-static void __init smp_tune_scheduling(void)
-{
- unsigned int smallest = ~0U;
- int i;
-
- for (i = 0; i < NR_CPUS; i++) {
- unsigned int val = cpu_data(i).ecache_size;
-
- if (val && val < smallest)
- smallest = val;
- }
-
- /* Any value less than 256K is nonsense. */
- if (smallest < (256U * 1024U))
- smallest = 256 * 1024;
-
- max_cache_size = smallest;
-
- if (smallest < 1U * 1024U * 1024U)
- printk(KERN_INFO "Using max_cache_size of %uKB\n",
- smallest / 1024U);
- else
- printk(KERN_INFO "Using max_cache_size of %uMB\n",
- smallest / 1024U / 1024U);
-}
-
/* Constrain the number of cpus to max_cpus. */
void __init smp_prepare_cpus(unsigned int max_cpus)
{
@@ -1206,7 +1180,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
}
cpu_data(boot_cpu_id).udelay_val = loops_per_jiffy;
- smp_tune_scheduling();
}
void __devinit smp_prepare_boot_cpu(void)
diff --git a/drivers/ide/arm/icside.c b/drivers/ide/arm/icside.c
index 66f826252ae..444a0b84f5b 100644
--- a/drivers/ide/arm/icside.c
+++ b/drivers/ide/arm/icside.c
@@ -448,23 +448,21 @@ static int icside_dma_test_irq(ide_drive_t *drive)
ICS_ARCIN_V6_INTRSTAT_1)) & 1;
}
-static int icside_dma_timeout(ide_drive_t *drive)
+static void icside_dma_timeout(ide_drive_t *drive)
{
printk(KERN_ERR "%s: DMA timeout occurred: ", drive->name);
if (icside_dma_test_irq(drive))
- return 0;
+ return;
- ide_dump_status(drive, "DMA timeout",
- HWIF(drive)->INB(IDE_STATUS_REG));
+ ide_dump_status(drive, "DMA timeout", HWIF(drive)->INB(IDE_STATUS_REG));
- return icside_dma_end(drive);
+ icside_dma_end(drive);
}
-static int icside_dma_lostirq(ide_drive_t *drive)
+static void icside_dma_lost_irq(ide_drive_t *drive)
{
printk(KERN_ERR "%s: IRQ lost\n", drive->name);
- return 1;
}
static void icside_dma_init(ide_hwif_t *hwif)
@@ -490,8 +488,8 @@ static void icside_dma_init(ide_hwif_t *hwif)
hwif->dma_start = icside_dma_start;
hwif->ide_dma_end = icside_dma_end;
hwif->ide_dma_test_irq = icside_dma_test_irq;
- hwif->ide_dma_timeout = icside_dma_timeout;
- hwif->ide_dma_lostirq = icside_dma_lostirq;
+ hwif->dma_timeout = icside_dma_timeout;
+ hwif->dma_lost_irq = icside_dma_lost_irq;
hwif->drives[0].autodma = hwif->autodma;
hwif->drives[1].autodma = hwif->autodma;
diff --git a/drivers/ide/cris/ide-cris.c b/drivers/ide/cris/ide-cris.c
index ca0341c05e5..886091bc7db 100644
--- a/drivers/ide/cris/ide-cris.c
+++ b/drivers/ide/cris/ide-cris.c
@@ -819,7 +819,7 @@ init_e100_ide (void)
hwif->dma_host_off = &cris_dma_off;
hwif->dma_host_on = &cris_dma_on;
hwif->dma_off_quietly = &cris_dma_off;
- hwif->udma_four = 0;
+ hwif->cbl = ATA_CBL_PATA40;
hwif->ultra_mask = cris_ultra_mask;
hwif->mwdma_mask = 0x07; /* Multiword DMA 0-2 */
hwif->autodma = 1;
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 252ab8295ed..1486eb212cc 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -481,7 +481,7 @@ void cdrom_analyze_sense_data(ide_drive_t *drive,
else
printk(" Unknown Error Type: ");
- if (sense->sense_key < ARY_LEN(sense_key_texts))
+ if (sense->sense_key < ARRAY_SIZE(sense_key_texts))
s = sense_key_texts[sense->sense_key];
printk("%s -- (Sense key=0x%02x)\n", s, sense->sense_key);
@@ -491,7 +491,7 @@ void cdrom_analyze_sense_data(ide_drive_t *drive,
sense->ascq);
s = buf;
} else {
- int lo = 0, mid, hi = ARY_LEN(sense_data_texts);
+ int lo = 0, mid, hi = ARRAY_SIZE(sense_data_texts);
unsigned long key = (sense->sense_key << 16);
key |= (sense->asc << 8);
if (!(sense->ascq >= 0x80 && sense->ascq <= 0xdd))
@@ -524,7 +524,7 @@ void cdrom_analyze_sense_data(ide_drive_t *drive,
if (failed_command != NULL) {
- int lo=0, mid, hi= ARY_LEN (packet_command_texts);
+ int lo=0, mid, hi= ARRAY_SIZE(packet_command_texts);
s = NULL;
while (hi > lo) {
diff --git a/drivers/ide/ide-cd.h b/drivers/ide/ide-cd.h
index ad1f2ed14a3..228b29c5d2e 100644
--- a/drivers/ide/ide-cd.h
+++ b/drivers/ide/ide-cd.h
@@ -498,8 +498,6 @@ struct cdrom_info {
* Descriptions of ATAPI error codes.
*/
-#define ARY_LEN(a) ((sizeof(a) / sizeof(a[0])))
-
/* This stuff should be in cdrom.h, since it is now generic... */
/* ATAPI sense keys (from table 140 of ATAPI 2.6) */
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index dc2175c81f5..b1304a7f3e0 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -1190,11 +1190,11 @@ static int idedisk_ioctl(struct inode *inode, struct file *file,
return generic_ide_ioctl(drive, file, bdev, cmd, arg);
read_val:
- down(&ide_setting_sem);
+ mutex_lock(&ide_setting_mtx);
spin_lock_irqsave(&ide_lock, flags);
err = *val;
spin_unlock_irqrestore(&ide_lock, flags);
- up(&ide_setting_sem);
+ mutex_unlock(&ide_setting_mtx);
return err >= 0 ? put_user(err, (long __user *)arg) : err;
set_val:
@@ -1204,9 +1204,9 @@ set_val:
if (!capable(CAP_SYS_ADMIN))
err = -EACCES;
else {
- down(&ide_setting_sem);
+ mutex_lock(&ide_setting_mtx);
err = setfunc(drive, arg);
- up(&ide_setting_sem);
+ mutex_unlock(&ide_setting_mtx);
}
}
return err;
diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index ead141e2db9..5fe1d72ab45 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c
@@ -91,45 +91,45 @@
static const struct drive_list_entry drive_whitelist [] = {
- { "Micropolis 2112A" , "ALL" },
- { "CONNER CTMA 4000" , "ALL" },
- { "CONNER CTT8000-A" , "ALL" },
- { "ST34342A" , "ALL" },
+ { "Micropolis 2112A" , NULL },
+ { "CONNER CTMA 4000" , NULL },
+ { "CONNER CTT8000-A" , NULL },
+ { "ST34342A" , NULL },
{ NULL , NULL }
};
static const struct drive_list_entry drive_blacklist [] = {
- { "WDC AC11000H" , "ALL" },
- { "WDC AC22100H" , "ALL" },
- { "WDC AC32500H" , "ALL" },
- { "WDC AC33100H" , "ALL" },
- { "WDC AC31600H" , "ALL" },
+ { "WDC AC11000H" , NULL },
+ { "WDC AC22100H" , NULL },
+ { "WDC AC32500H" , NULL },
+ { "WDC AC33100H" , NULL },
+ { "WDC AC31600H" , NULL },
{ "WDC AC32100H" , "24.09P07" },
{ "WDC AC23200L" , "21.10N21" },
- { "Compaq CRD-8241B" , "ALL" },
- { "CRD-8400B" , "ALL" },
- { "CRD-8480B", "ALL" },
- { "CRD-8482B", "ALL" },
- { "CRD-84" , "ALL" },
- { "SanDisk SDP3B" , "ALL" },
- { "SanDisk SDP3B-64" , "ALL" },
- { "SANYO CD-ROM CRD" , "ALL" },
- { "HITACHI CDR-8" , "ALL" },
- { "HITACHI CDR-8335" , "ALL" },
- { "HITACHI CDR-8435" , "ALL" },
- { "Toshiba CD-ROM XM-6202B" , "ALL" },
- { "TOSHIBA CD-ROM XM-1702BC", "ALL" },
- { "CD-532E-A" , "ALL" },
- { "E-IDE CD-ROM CR-840", "ALL" },
- { "CD-ROM Drive/F5A", "ALL" },
- { "WPI CDD-820", "ALL" },
- { "SAMSUNG CD-ROM SC-148C", "ALL" },
- { "SAMSUNG CD-ROM SC", "ALL" },
- { "ATAPI CD-ROM DRIVE 40X MAXIMUM", "ALL" },
- { "_NEC DV5800A", "ALL" },
+ { "Compaq CRD-8241B" , NULL },
+ { "CRD-8400B" , NULL },
+ { "CRD-8480B", NULL },
+ { "CRD-8482B", NULL },
+ { "CRD-84" , NULL },
+ { "SanDisk SDP3B" , NULL },
+ { "SanDisk SDP3B-64" , NULL },
+ { "SANYO CD-ROM CRD" , NULL },
+ { "HITACHI CDR-8" , NULL },
+ { "HITACHI CDR-8335" , NULL },
+ { "HITACHI CDR-8435" , NULL },
+ { "Toshiba CD-ROM XM-6202B" , NULL },
+ { "TOSHIBA CD-ROM XM-1702BC", NULL },
+ { "CD-532E-A" , NULL },
+ { "E-IDE CD-ROM CR-840", NULL },
+ { "CD-ROM Drive/F5A", NULL },
+ { "WPI CDD-820", NULL },
+ { "SAMSUNG CD-ROM SC-148C", NULL },
+ { "SAMSUNG CD-ROM SC", NULL },
+ { "ATAPI CD-ROM DRIVE 40X MAXIMUM", NULL },
+ { "_NEC DV5800A", NULL },
{ "SAMSUNG CD-ROM SN-124", "N001" },
- { "Seagate STT20000A", "ALL" },
+ { "Seagate STT20000A", NULL },
{ NULL , NULL }
};
@@ -147,8 +147,8 @@ int ide_in_drive_list(struct hd_driveid *id, const struct drive_list_entry *driv
{
for ( ; drive_table->id_model ; drive_table++)
if ((!strcmp(drive_table->id_model, id->model)) &&
- ((strstr(id->fw_rev, drive_table->id_firmware)) ||
- (!strcmp(drive_table->id_firmware, "ALL"))))
+ (!drive_table->id_firmware ||
+ strstr(id->fw_rev, drive_table->id_firmware)))
return 1;
return 0;
}
@@ -702,8 +702,22 @@ static unsigned int ide_get_mode_mask(ide_drive_t *drive, u8 base)
mask = id->dma_mword & hwif->mwdma_mask;
break;
case XFER_SW_DMA_0:
- if (id->field_valid & 2)
+ if (id->field_valid & 2) {
mask = id->dma_1word & hwif->swdma_mask;
+ } else if (id->tDMA) {
+ /*
+ * ide_fix_driveid() doesn't convert ->tDMA to the
+ * CPU endianness so we need to do it here
+ */
+ u8 mode = le16_to_cpu(id->tDMA);
+
+ /*
+ * if the mode is valid convert it to the mask
+ * (the maximum allowed mode is XFER_SW_DMA_2)
+ */
+ if (mode <= 2)
+ mask = ((2 << mode) - 1) & hwif->swdma_mask;
+ }
break;
default:
BUG();
@@ -847,27 +861,27 @@ int ide_set_dma(ide_drive_t *drive)
return rc;
}
-EXPORT_SYMBOL_GPL(ide_set_dma);
-
#ifdef CONFIG_BLK_DEV_IDEDMA_PCI
-int __ide_dma_lostirq (ide_drive_t *drive)
+void ide_dma_lost_irq (ide_drive_t *drive)
{
printk("%s: DMA interrupt recovery\n", drive->name);
- return 1;
}
-EXPORT_SYMBOL(__ide_dma_lostirq);
+EXPORT_SYMBOL(ide_dma_lost_irq);
-int __ide_dma_timeout (ide_drive_t *drive)
+void ide_dma_timeout (ide_drive_t *drive)
{
+ ide_hwif_t *hwif = HWIF(drive);
+
printk(KERN_ERR "%s: timeout waiting for DMA\n", drive->name);
- if (HWIF(drive)->ide_dma_test_irq(drive))
- return 0;
- return HWIF(drive)->ide_dma_end(drive);
+ if (hwif->ide_dma_test_irq(drive))
+ return;
+
+ hwif->ide_dma_end(drive);
}
-EXPORT_SYMBOL(__ide_dma_timeout);
+EXPORT_SYMBOL(ide_dma_timeout);
/*
* Needed for allowing full modular support of ide-driver
@@ -1018,10 +1032,10 @@ void ide_setup_dma (ide_hwif_t *hwif, unsigned long dma_base, unsigned int num_p
hwif->ide_dma_end = &__ide_dma_end;
if (!hwif->ide_dma_test_irq)
hwif->ide_dma_test_irq = &__ide_dma_test_irq;
- if (!hwif->ide_dma_timeout)
- hwif->ide_dma_timeout = &__ide_dma_timeout;
- if (!hwif->ide_dma_lostirq)
- hwif->ide_dma_lostirq = &__ide_dma_lostirq;
+ if (!hwif->dma_timeout)
+ hwif->dma_timeout = &ide_dma_timeout;
+ if (!hwif->dma_lost_irq)
+ hwif->dma_lost_irq = &ide_dma_lost_irq;
if (hwif->chipset != ide_trm290) {
u8 dma_stat = hwif->INB(hwif->dma_status);
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index bfe8f1b712b..c5b5011da56 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -1350,7 +1350,7 @@ static ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error)
hwif->INB(IDE_STATUS_REG));
} else {
printk(KERN_WARNING "%s: DMA timeout retry\n", drive->name);
- (void) hwif->ide_dma_timeout(drive);
+ hwif->dma_timeout(drive);
}
/*
@@ -1466,7 +1466,7 @@ void ide_timer_expiry (unsigned long data)
startstop = handler(drive);
} else if (drive_is_ready(drive)) {
if (drive->waiting_for_dma)
- (void) hwgroup->hwif->ide_dma_lostirq(drive);
+ hwgroup->hwif->dma_lost_irq(drive);
(void)ide_ack_intr(hwif);
printk(KERN_WARNING "%s: lost interrupt\n", drive->name);
startstop = handler(drive);
diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index f0be5f665a0..92578b6832e 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -574,7 +574,10 @@ u8 eighty_ninty_three (ide_drive_t *drive)
ide_hwif_t *hwif = drive->hwif;
struct hd_driveid *id = drive->id;
- if (hwif->udma_four == 0)
+ if (hwif->cbl == ATA_CBL_PATA40_SHORT)
+ return 1;
+
+ if (hwif->cbl != ATA_CBL_PATA80)
goto no_80w;
/* Check for SATA but only if we are ATA5 or higher */
@@ -600,7 +603,8 @@ no_80w:
printk(KERN_WARNING "%s: %s side 80-wire cable detection failed, "
"limiting max speed to UDMA33\n",
- drive->name, hwif->udma_four ? "drive" : "host");
+ drive->name,
+ hwif->cbl == ATA_CBL_PATA80 ? "drive" : "host");
drive->udma33_warned = 1;
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index f5ce22c38f8..cc580139946 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -144,7 +144,7 @@ static inline void do_identify (ide_drive_t *drive, u8 cmd)
local_irq_enable();
ide_fix_driveid(id);
-#if defined (CONFIG_SCSI_EATA_DMA) || defined (CONFIG_SCSI_EATA_PIO) || defined (CONFIG_SCSI_EATA)
+#if defined (CONFIG_SCSI_EATA_PIO) || defined (CONFIG_SCSI_EATA)
/*
* EATA SCSI controllers do a hardware ATA emulation:
* Ignore them if there is a driver for them available.
@@ -154,7 +154,7 @@ static inline void do_identify (ide_drive_t *drive, u8 cmd)
printk("%s: EATA SCSI HBA %.10s\n", drive->name, id->model);
goto err_misc;
}
-#endif /* CONFIG_SCSI_EATA_DMA || CONFIG_SCSI_EATA_PIO */
+#endif /* CONFIG_SCSI_EATA || CONFIG_SCSI_EATA_PIO */
/*
* WIN_IDENTIFY returns little-endian info,
@@ -1025,7 +1025,7 @@ static int init_irq (ide_hwif_t *hwif)
BUG_ON(irqs_disabled());
BUG_ON(hwif == NULL);
- down(&ide_cfg_sem);
+ mutex_lock(&ide_cfg_mtx);
hwif->hwgroup = NULL;
#if MAX_HWIFS > 1
/*
@@ -1154,7 +1154,7 @@ static int init_irq (ide_hwif_t *hwif)
printk(" (%sed with %s)",
hwif->sharing_irq ? "shar" : "serializ", match->name);
printk("\n");
- up(&ide_cfg_sem);
+ mutex_unlock(&ide_cfg_mtx);
return 0;
out_unlink:
spin_lock_irq(&ide_lock);
@@ -1177,7 +1177,7 @@ out_unlink:
}
spin_unlock_irq(&ide_lock);
out_up:
- up(&ide_cfg_sem);
+ mutex_unlock(&ide_cfg_mtx);
return 1;
}
diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c
index ea94c9aa122..fc1d8ae6a80 100644
--- a/drivers/ide/ide-proc.c
+++ b/drivers/ide/ide-proc.c
@@ -156,7 +156,7 @@ static int __ide_add_setting(ide_drive_t *drive, const char *name, int rw, int d
{
ide_settings_t **p = (ide_settings_t **) &drive->settings, *setting = NULL;
- down(&ide_setting_sem);
+ mutex_lock(&ide_setting_mtx);
while ((*p) && strcmp((*p)->name, name) < 0)
p = &((*p)->next);
if ((setting = kzalloc(sizeof(*setting), GFP_KERNEL)) == NULL)
@@ -177,10 +177,10 @@ static int __ide_add_setting(ide_drive_t *drive, const char *name, int rw, int d
if (auto_remove)
setting->auto_remove = 1;
*p = setting;
- up(&ide_setting_sem);
+ mutex_unlock(&ide_setting_mtx);
return 0;
abort:
- up(&ide_setting_sem);
+ mutex_unlock(&ide_setting_mtx);
kfree(setting);
return -1;
}
@@ -224,7 +224,7 @@ static void __ide_remove_setting (ide_drive_t *drive, char *name)
*
* Automatically remove all the driver specific settings for this
* drive. This function may not be called from IRQ context. The
- * caller must hold ide_setting_sem.
+ * caller must hold ide_setting_mtx.
*/
static void auto_remove_settings (ide_drive_t *drive)
@@ -269,7 +269,7 @@ static ide_settings_t *ide_find_setting_by_name(ide_drive_t *drive, char *name)
* @setting: drive setting
*
* Read a drive setting and return the value. The caller
- * must hold the ide_setting_sem when making this call.
+ * must hold the ide_setting_mtx when making this call.
*
* BUGS: the data return and error are the same return value
* so an error -EINVAL and true return of the same value cannot
@@ -306,7 +306,7 @@ static int ide_read_setting(ide_drive_t *drive, ide_settings_t *setting)
* @val: value
*
* Write a drive setting if it is possible. The caller
- * must hold the ide_setting_sem when making this call.
+ * must hold the ide_setting_mtx when making this call.
*
* BUGS: the data return and error are the same return value
* so an error -EINVAL and true return of the same value cannot
@@ -367,7 +367,7 @@ static int set_xfer_rate (ide_drive_t *drive, int arg)
* @drive: drive being configured
*
* Add the generic parts of the system settings to the /proc files.
- * The caller must not be holding the ide_setting_sem.
+ * The caller must not be holding the ide_setting_mtx.
*/
void ide_add_generic_settings (ide_drive_t *drive)
@@ -408,7 +408,7 @@ static int proc_ide_read_settings
proc_ide_settings_warn();
- down(&ide_setting_sem);
+ mutex_lock(&ide_setting_mtx);
out += sprintf(out, "name\t\t\tvalue\t\tmin\t\tmax\t\tmode\n");
out += sprintf(out, "----\t\t\t-----\t\t---\t\t---\t\t----\n");
while(setting) {
@@ -428,7 +428,7 @@ static int proc_ide_read_settings
setting = setting->next;
}
len = out - page;
- up(&ide_setting_sem);
+ mutex_unlock(&ide_setting_mtx);
PROC_IDE_READ_RETURN(page,start,off,count,eof,len);
}
@@ -508,16 +508,16 @@ static int proc_ide_write_settings(struct file *file, const char __user *buffer,
++p;
}
- down(&ide_setting_sem);
+ mutex_lock(&ide_setting_mtx);
setting = ide_find_setting_by_name(drive, name);
if (!setting)
{
- up(&ide_setting_sem);
+ mutex_unlock(&ide_setting_mtx);
goto parse_error;
}
if (for_real)
ide_write_setting(drive, setting, val * setting->div_factor / setting->mul_factor);
- up(&ide_setting_sem);
+ mutex_unlock(&ide_setting_mtx);
}
} while (!for_real++);
free_page((unsigned long)buf);
@@ -705,7 +705,7 @@ EXPORT_SYMBOL(ide_proc_register_driver);
* Clean up the driver specific /proc files and IDE settings
* for a given drive.
*
- * Takes ide_setting_sem and ide_lock.
+ * Takes ide_setting_mtx and ide_lock.
* Caller must hold none of the locks.
*/
@@ -715,10 +715,10 @@ void ide_proc_unregister_driver(ide_drive_t *drive, ide_driver_t *driver)
ide_remove_proc_entries(drive->proc, driver->proc);
- down(&ide_setting_sem);
+ mutex_lock(&ide_setting_mtx);
spin_lock_irqsave(&ide_lock, flags);
/*
- * ide_setting_sem protects the settings list
+ * ide_setting_mtx protects the settings list
* ide_lock protects the use of settings
*
* so we need to hold both, ide_settings_sem because we want to
@@ -726,11 +726,11 @@ void ide_proc_unregister_driver(ide_drive_t *drive, ide_driver_t *driver)
* a setting out that is being used.
*
* OTOH both ide_{read,write}_setting are only ever used under
- * ide_setting_sem.
+ * ide_setting_mtx.
*/
auto_remove_settings(drive);
spin_unlock_irqrestore(&ide_lock, flags);
- up(&ide_setting_sem);
+ mutex_unlock(&ide_setting_mtx);
}
EXPORT_SYMBOL(ide_proc_unregister_driver);
diff --git a/drivers/ide/ide-timing.h b/drivers/ide/ide-timing.h
index c0864b1e922..e6cb8593b5b 100644
--- a/drivers/ide/ide-timing.h
+++ b/drivers/ide/ide-timing.h
@@ -102,66 +102,16 @@ static struct ide_timing ide_timing[] = {
#define EZ(v,unit) ((v)?ENOUGH(v,unit):0)
#define XFER_MODE 0xf0
-#define XFER_UDMA_133 0x48
-#define XFER_UDMA_100 0x44
-#define XFER_UDMA_66 0x42
-#define XFER_UDMA 0x40
#define XFER_MWDMA 0x20
-#define XFER_SWDMA 0x10
#define XFER_EPIO 0x01
#define XFER_PIO 0x00
-static short ide_find_best_mode(ide_drive_t *drive, int map)
+static short ide_find_best_pio_mode(ide_drive_t *drive)
{
struct hd_driveid *id = drive->id;
short best = 0;
- if (!id)
- return XFER_PIO_SLOW;
-
- if ((map & XFER_UDMA) && (id->field_valid & 4)) { /* Want UDMA and UDMA bitmap valid */
-
- if ((map & XFER_UDMA_133) == XFER_UDMA_133)
- if ((best = (id->dma_ultra & 0x0040) ? XFER_UDMA_6 : 0)) return best;
-
- if ((map & XFER_UDMA_100) == XFER_UDMA_100)
- if ((best = (id->dma_ultra & 0x0020) ? XFER_UDMA_5 : 0)) return best;
-
- if ((map & XFER_UDMA_66) == XFER_UDMA_66)
- if ((best = (id->dma_ultra & 0x0010) ? XFER_UDMA_4 :
- (id->dma_ultra & 0x0008) ? XFER_UDMA_3 : 0)) return best;
-
- if ((best = (id->dma_ultra & 0x0004) ? XFER_UDMA_2 :
- (id->dma_ultra & 0x0002) ? XFER_UDMA_1 :
- (id->dma_ultra & 0x0001) ? XFER_UDMA_0 : 0)) return best;
- }
-
- if ((map & XFER_MWDMA) && (id->field_valid & 2)) { /* Want MWDMA and drive has EIDE fields */
-
- if ((best = (id->dma_mword & 0x0004) ? XFER_MW_DMA_2 :
- (id->dma_mword & 0x0002) ? XFER_MW_DMA_1 :
- (id->dma_mword & 0x0001) ? XFER_MW_DMA_0 : 0)) return best;
- }
-
- if (map & XFER_SWDMA) { /* Want SWDMA */
-
- if (id->field_valid & 2) { /* EIDE SWDMA */
-
- if ((best = (id->dma_1word & 0x0004) ? XFER_SW_DMA_2 :
- (id->dma_1word & 0x0002) ? XFER_SW_DMA_1 :
- (id->dma_1word & 0x0001) ? XFER_SW_DMA_0 : 0)) return best;
- }
-
- if (id->capability & 1) { /* Pre-EIDE style SWDMA */
-
- if ((best = (id->tDMA == 2) ? XFER_SW_DMA_2 :
- (id->tDMA == 1) ? XFER_SW_DMA_1 :
- (id->tDMA == 0) ? XFER_SW_DMA_0 : 0)) return best;
- }
- }
-
-
- if ((map & XFER_EPIO) && (id->field_valid & 2)) { /* EIDE PIO modes */
+ if (id->field_valid & 2) { /* EIDE PIO modes */
if ((best = (drive->id->eide_pio_modes & 4) ? XFER_PIO_5 :
(drive->id->eide_pio_modes & 2) ? XFER_PIO_4 :
@@ -262,7 +212,7 @@ static int ide_timing_compute(ide_drive_t *drive, short speed, struct ide_timing
*/
if ((speed & XFER_MODE) != XFER_PIO) {
- ide_timing_compute(drive, ide_find_best_mode(drive, XFER_PIO | XFER_EPIO), &p, T, UT);
+ ide_timing_compute(drive, ide_find_best_pio_mode(drive), &p, T, UT);
ide_timing_merge(&p, t, t, IDE_TIMING_ALL);
}
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index 0cd76bf6683..c948a5c17a5 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -169,7 +169,7 @@ static const u8 ide_hwif_to_major[] = { IDE0_MAJOR, IDE1_MAJOR,
static int idebus_parameter; /* holds the "idebus=" parameter */
static int system_bus_speed; /* holds what we think is VESA/PCI bus speed */
-DECLARE_MUTEX(ide_cfg_sem);
+DEFINE_MUTEX(ide_cfg_mtx);
__cacheline_aligned_in_smp DEFINE_SPINLOCK(ide_lock);
#ifdef CONFIG_IDEPCI_PCIBUS_ORDER
@@ -460,6 +460,8 @@ static void ide_hwif_restore(ide_hwif_t *hwif, ide_hwif_t *tmp_hwif)
hwif->mwdma_mask = tmp_hwif->mwdma_mask;
hwif->swdma_mask = tmp_hwif->swdma_mask;
+ hwif->cbl = tmp_hwif->cbl;
+
hwif->chipset = tmp_hwif->chipset;
hwif->hold = tmp_hwif->hold;
@@ -496,8 +498,8 @@ static void ide_hwif_restore(ide_hwif_t *hwif, ide_hwif_t *tmp_hwif)
hwif->ide_dma_clear_irq = tmp_hwif->ide_dma_clear_irq;
hwif->dma_host_on = tmp_hwif->dma_host_on;
hwif->dma_host_off = tmp_hwif->dma_host_off;
- hwif->ide_dma_lostirq = tmp_hwif->ide_dma_lostirq;
- hwif->ide_dma_timeout = tmp_hwif->ide_dma_timeout;
+ hwif->dma_lost_irq = tmp_hwif->dma_lost_irq;
+ hwif->dma_timeout = tmp_hwif->dma_timeout;
hwif->OUTB = tmp_hwif->OUTB;
hwif->OUTBSYNC = tmp_hwif->OUTBSYNC;
@@ -533,7 +535,6 @@ static void ide_hwif_restore(ide_hwif_t *hwif, ide_hwif_t *tmp_hwif)
hwif->extra_base = tmp_hwif->extra_base;
hwif->extra_ports = tmp_hwif->extra_ports;
hwif->autodma = tmp_hwif->autodma;
- hwif->udma_four = tmp_hwif->udma_four;
hwif->hwif_data = tmp_hwif->hwif_data;
}
@@ -564,7 +565,7 @@ void ide_unregister(unsigned int index)
{
ide_drive_t *drive;
ide_hwif_t *hwif, *g;
- static ide_hwif_t tmp_hwif; /* protected by ide_cfg_sem */
+ static ide_hwif_t tmp_hwif; /* protected by ide_cfg_mtx */
ide_hwgroup_t *hwgroup;
int irq_count = 0, unit;
@@ -572,7 +573,7 @@ void ide_unregister(unsigned int index)
BUG_ON(in_interrupt());
BUG_ON(irqs_disabled());
- down(&ide_cfg_sem);
+ mutex_lock(&ide_cfg_mtx);
spin_lock_irq(&ide_lock);
hwif = &ide_hwifs[index];
if (!hwif->present)
@@ -679,7 +680,7 @@ void ide_unregister(unsigned int index)
abort:
spin_unlock_irq(&ide_lock);
- up(&ide_cfg_sem);
+ mutex_unlock(&ide_cfg_mtx);
}
EXPORT_SYMBOL(ide_unregister);
@@ -817,9 +818,9 @@ EXPORT_SYMBOL(ide_register_hw);
* Locks for IDE setting functionality
*/
-DECLARE_MUTEX(ide_setting_sem);
+DEFINE_MUTEX(ide_setting_mtx);
-EXPORT_SYMBOL_GPL(ide_setting_sem);
+EXPORT_SYMBOL_GPL(ide_setting_mtx);
/**
* ide_spin_wait_hwgroup - wait for group
@@ -1192,11 +1193,11 @@ int generic_ide_ioctl(ide_drive_t *drive, struct file *file, struct block_device
}
read_val:
- down(&ide_setting_sem);
+ mutex_lock(&ide_setting_mtx);
spin_lock_irqsave(&ide_lock, flags);
err = *val;
spin_unlock_irqrestore(&ide_lock, flags);
- up(&ide_setting_sem);
+ mutex_unlock(&ide_setting_mtx);
return err >= 0 ? put_user(err, (long __user *)arg) : err;
set_val:
@@ -1206,9 +1207,9 @@ set_val:
if (!capable(CAP_SYS_ADMIN))
err = -EACCES;
else {
- down(&ide_setting_sem);
+ mutex_lock(&ide_setting_mtx);
err = setfunc(drive, arg);
- up(&ide_setting_sem);
+ mutex_unlock(&ide_setting_mtx);
}
}
return err;
@@ -1548,7 +1549,11 @@ static int __init ide_setup(char *s)
goto bad_option;
case -7: /* ata66 */
#ifdef CONFIG_BLK_DEV_IDEPCI
- hwif->udma_four = 1;
+ /*
+ * Use ATA_CBL_PATA40_SHORT so drive side
+ * cable detection is also overriden.
+ */
+ hwif->cbl = ATA_CBL_PATA40_SHORT;
goto obsolete_option;
#else
goto bad_hwif;
diff --git a/drivers/ide/legacy/hd.c b/drivers/ide/legacy/hd.c
index 45ed03591cd..661c12f6dda 100644
--- a/drivers/ide/legacy/hd.c
+++ b/drivers/ide/legacy/hd.c
@@ -130,7 +130,7 @@ struct hd_i_struct {
#ifdef HD_TYPE
static struct hd_i_struct hd_info[] = { HD_TYPE };
-static int NR_HD = ((sizeof (hd_info))/(sizeof (struct hd_i_struct)));
+static int NR_HD = ARRAY_SIZE(hd_info);
#else
static struct hd_i_struct hd_info[MAX_HD];
static int NR_HD;
diff --git a/drivers/ide/legacy/macide.c b/drivers/ide/legacy/macide.c
index c211fc78345..b557c45a5a9 100644
--- a/drivers/ide/legacy/macide.c
+++ b/drivers/ide/legacy/macide.c
@@ -77,15 +77,6 @@ int macide_ack_intr(ide_hwif_t* hwif)
return 0;
}
-#ifdef CONFIG_BLK_DEV_MAC_MEDIABAY
-static void macide_mediabay_interrupt(int irq, void *dev_id)
-{
- int state = baboon->mb_status & 0x04;
-
- printk(KERN_INFO "macide: media bay %s detected\n", state? "removal":"insertion");
-}
-#endif
-
/*
* Probe for a Macintosh IDE interface
*/
@@ -128,11 +119,6 @@ void macide_init(void)
ide_drive_t *drive = &ide_hwifs[index].drives[0];
drive->capacity64 = drive->cyl*drive->head*drive->sect;
-#ifdef CONFIG_BLK_DEV_MAC_MEDIABAY
- request_irq(IRQ_BABOON_2, macide_mediabay_interrupt,
- IRQ_FLG_FAST, "mediabay",
- macide_mediabay_interrupt);
-#endif
}
break;
diff --git a/drivers/ide/mips/au1xxx-ide.c b/drivers/ide/mips/au1xxx-ide.c
index ca95e990862..2e7013a2a7f 100644
--- a/drivers/ide/mips/au1xxx-ide.c
+++ b/drivers/ide/mips/au1xxx-ide.c
@@ -381,9 +381,7 @@ static int auide_dma_setup(ide_drive_t *drive)
static int auide_dma_check(ide_drive_t *drive)
{
- u8 speed;
-
-#ifdef CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA
+ u8 speed = ide_max_dma_mode(drive);
if( dbdma_init_done == 0 ){
auide_hwif.white_list = ide_in_drive_list(drive->id,
@@ -394,7 +392,6 @@ static int auide_dma_check(ide_drive_t *drive)
auide_ddma_init(&auide_hwif);
dbdma_init_done = 1;
}
-#endif
/* Is the drive in our DMA black list? */
@@ -409,8 +406,6 @@ static int auide_dma_check(ide_drive_t *drive)
else
drive->using_dma = 1;
- speed = ide_find_best_mode(drive, XFER_PIO | XFER_MWDMA);
-
if (drive->autodma && (speed & XFER_MODE) != XFER_PIO)
return 0;
@@ -456,10 +451,9 @@ static void auide_dma_off_quietly(ide_drive_t *drive)
drive->using_dma = 0;
}
-static int auide_dma_lostirq(ide_drive_t *drive)
+static void auide_dma_lost_irq(ide_drive_t *drive)
{
printk(KERN_ERR "%s: IRQ lost\n", drive->name);
- return 0;
}
static void auide_ddma_tx_callback(int irq, void *param)
@@ -489,16 +483,16 @@ static void auide_init_dbdma_dev(dbdev_tab_t *dev, u32 dev_id, u32 tsize, u32 de
#if defined(CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA)
-static int auide_dma_timeout(ide_drive_t *drive)
+static void auide_dma_timeout(ide_drive_t *drive)
{
-// printk("%s\n", __FUNCTION__);
+ ide_hwif_t *hwif = HWIF(drive);
printk(KERN_ERR "%s: DMA timeout occurred: ", drive->name);
- if (HWIF(drive)->ide_dma_test_irq(drive))
- return 0;
+ if (hwif->ide_dma_test_irq(drive))
+ return;
- return HWIF(drive)->ide_dma_end(drive);
+ hwif->ide_dma_end(drive);
}
@@ -721,7 +715,7 @@ static int au_ide_probe(struct device *dev)
#ifdef CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA
hwif->dma_off_quietly = &auide_dma_off_quietly;
- hwif->ide_dma_timeout = &auide_dma_timeout;
+ hwif->dma_timeout = &auide_dma_timeout;
hwif->ide_dma_check = &auide_dma_check;
hwif->dma_exec_cmd = &auide_dma_exec_cmd;
@@ -731,7 +725,7 @@ static int au_ide_probe(struct device *dev)
hwif->ide_dma_test_irq = &auide_dma_test_irq;
hwif->dma_host_off = &auide_dma_host_off;
hwif->dma_host_on = &auide_dma_host_on;
- hwif->ide_dma_lostirq = &auide_dma_lostirq;
+ hwif->dma_lost_irq = &auide_dma_lost_irq;
hwif->ide_dma_on = &auide_dma_on;
hwif->autodma = 1;
diff --git a/drivers/ide/pci/aec62xx.c b/drivers/ide/pci/aec62xx.c
index b173bc66ce1..e5d09367627 100644
--- a/drivers/ide/pci/aec62xx.c
+++ b/drivers/ide/pci/aec62xx.c
@@ -1,5 +1,5 @@
/*
- * linux/drivers/ide/pci/aec62xx.c Version 0.21 Apr 21, 2007
+ * linux/drivers/ide/pci/aec62xx.c Version 0.24 May 24, 2007
*
* Copyright (C) 1999-2002 Andre Hedrick <andre@linux-ide.org>
* Copyright (C) 2007 MontaVista Software, Inc. <source@mvista.com>
@@ -140,25 +140,10 @@ static int aec6260_tune_chipset (ide_drive_t *drive, u8 xferspeed)
return(ide_config_drive_speed(drive, speed));
}
-static int aec62xx_tune_chipset (ide_drive_t *drive, u8 speed)
-{
- switch (HWIF(drive)->pci_dev->device) {
- case PCI_DEVICE_ID_ARTOP_ATP865:
- case PCI_DEVICE_ID_ARTOP_ATP865R:
- case PCI_DEVICE_ID_ARTOP_ATP860:
- case PCI_DEVICE_ID_ARTOP_ATP860R:
- return ((int) aec6260_tune_chipset(drive, speed));
- case PCI_DEVICE_ID_ARTOP_ATP850UF:
- return ((int) aec6210_tune_chipset(drive, speed));
- default:
- return -1;
- }
-}
-
static void aec62xx_tune_drive (ide_drive_t *drive, u8 pio)
{
pio = ide_get_best_pio_mode(drive, pio, 4, NULL);
- (void) aec62xx_tune_chipset(drive, pio + XFER_PIO_0);
+ (void) HWIF(drive)->speedproc(drive, pio + XFER_PIO_0);
}
static int aec62xx_config_drive_xfer_rate (ide_drive_t *drive)
@@ -172,12 +157,9 @@ static int aec62xx_config_drive_xfer_rate (ide_drive_t *drive)
return -1;
}
-static int aec62xx_irq_timeout (ide_drive_t *drive)
+static void aec62xx_dma_lost_irq (ide_drive_t *drive)
{
- ide_hwif_t *hwif = HWIF(drive);
- struct pci_dev *dev = hwif->pci_dev;
-
- switch(dev->device) {
+ switch (HWIF(drive)->pci_dev->device) {
case PCI_DEVICE_ID_ARTOP_ATP860:
case PCI_DEVICE_ID_ARTOP_ATP860R:
case PCI_DEVICE_ID_ARTOP_ATP865:
@@ -186,7 +168,6 @@ static int aec62xx_irq_timeout (ide_drive_t *drive)
default:
break;
}
- return 0;
}
static unsigned int __devinit init_chipset_aec62xx(struct pci_dev *dev, const char *name)
@@ -224,64 +205,46 @@ static unsigned int __devinit init_chipset_aec62xx(struct pci_dev *dev, const ch
static void __devinit init_hwif_aec62xx(ide_hwif_t *hwif)
{
- struct pci_dev *dev = hwif->pci_dev;
+ struct pci_dev *dev = hwif->pci_dev;
+ u8 reg54 = 0, mask = hwif->channel ? 0xf0 : 0x0f;
+ unsigned long flags;
- hwif->autodma = 0;
hwif->tuneproc = &aec62xx_tune_drive;
- hwif->speedproc = &aec62xx_tune_chipset;
- if (dev->device == PCI_DEVICE_ID_ARTOP_ATP850UF)
- hwif->serialized = hwif->channel;
-
- if (hwif->mate)
- hwif->mate->serialized = hwif->serialized;
+ if (dev->device == PCI_DEVICE_ID_ARTOP_ATP850UF) {
+ if(hwif->mate)
+ hwif->mate->serialized = hwif->serialized = 1;
+ hwif->speedproc = &aec6210_tune_chipset;
+ } else
+ hwif->speedproc = &aec6260_tune_chipset;
if (!hwif->dma_base) {
- hwif->drives[0].autotune = 1;
- hwif->drives[1].autotune = 1;
+ hwif->drives[0].autotune = hwif->drives[1].autotune = 1;
return;
}
hwif->ultra_mask = hwif->cds->udma_mask;
-
- /* atp865 and atp865r */
- if (hwif->ultra_mask == 0x3f) {
- /* check bit 0x10 of DMA status register */
- if (inb(pci_resource_start(dev, 4) + 2) & 0x10)
- hwif->ultra_mask = 0x7f; /* udma0-6 */
- }
-
hwif->mwdma_mask = 0x07;
hwif->ide_dma_check = &aec62xx_config_drive_xfer_rate;
- hwif->ide_dma_lostirq = &aec62xx_irq_timeout;
-
- if (!noautodma)
- hwif->autodma = 1;
- hwif->drives[0].autodma = hwif->autodma;
- hwif->drives[1].autodma = hwif->autodma;
-}
-
-static void __devinit init_dma_aec62xx(ide_hwif_t *hwif, unsigned long dmabase)
-{
- struct pci_dev *dev = hwif->pci_dev;
+ hwif->dma_lost_irq = &aec62xx_dma_lost_irq;
if (dev->device == PCI_DEVICE_ID_ARTOP_ATP850UF) {
- u8 reg54h = 0;
- unsigned long flags;
-
spin_lock_irqsave(&ide_lock, flags);
- pci_read_config_byte(dev, 0x54, &reg54h);
- pci_write_config_byte(dev, 0x54, reg54h & ~(hwif->channel ? 0xF0 : 0x0F));
+ pci_read_config_byte (dev, 0x54, &reg54);
+ pci_write_config_byte(dev, 0x54, (reg54 & ~mask));
spin_unlock_irqrestore(&ide_lock, flags);
- } else {
- u8 ata66 = 0;
+ } else if (hwif->cbl != ATA_CBL_PATA40_SHORT) {
+ u8 ata66 = 0, mask = hwif->channel ? 0x02 : 0x01;
+
pci_read_config_byte(hwif->pci_dev, 0x49, &ata66);
- if (!(hwif->udma_four))
- hwif->udma_four = (ata66&(hwif->channel?0x02:0x01))?0:1;
+
+ hwif->cbl = (ata66 & mask) ? ATA_CBL_PATA40 : ATA_CBL_PATA80;
}
- ide_setup_dma(hwif, dmabase, 8);
+ if (!noautodma)
+ hwif->autodma = 1;
+ hwif->drives[0].autodma = hwif->drives[1].autodma = hwif->autodma;
}
static int __devinit init_setup_aec62xx(struct pci_dev *dev, ide_pci_device_t *d)
@@ -291,16 +254,12 @@ static int __devinit init_setup_aec62xx(struct pci_dev *dev, ide_pci_device_t *d
static int __devinit init_setup_aec6x80(struct pci_dev *dev, ide_pci_device_t *d)
{
- unsigned long bar4reg = pci_resource_start(dev, 4);
-
- if (inb(bar4reg+2) & 0x10) {
- strcpy(d->name, "AEC6880");
- if (dev->device == PCI_DEVICE_ID_ARTOP_ATP865R)
- strcpy(d->name, "AEC6880R");
- } else {
- strcpy(d->name, "AEC6280");
- if (dev->device == PCI_DEVICE_ID_ARTOP_ATP865R)
- strcpy(d->name, "AEC6280R");
+ unsigned long dma_base = pci_resource_start(dev, 4);
+
+ if (inb(dma_base + 2) & 0x10) {
+ d->name = (dev->device == PCI_DEVICE_ID_ARTOP_ATP865R) ?
+ "AEC6880R" : "AEC6880";
+ d->udma_mask = 0x7f; /* udma0-6 */
}
return ide_setup_pci_device(dev, d);
@@ -312,7 +271,6 @@ static ide_pci_device_t aec62xx_chipsets[] __devinitdata = {
.init_setup = init_setup_aec62xx,
.init_chipset = init_chipset_aec62xx,
.init_hwif = init_hwif_aec62xx,
- .init_dma = init_dma_aec62xx,
.channels = 2,
.autodma = AUTODMA,
.enablebits = {{0x4a,0x02,0x02}, {0x4a,0x04,0x04}},
@@ -323,7 +281,6 @@ static ide_pci_device_t aec62xx_chipsets[] __devinitdata = {
.init_setup = init_setup_aec62xx,
.init_chipset = init_chipset_aec62xx,
.init_hwif = init_hwif_aec62xx,
- .init_dma = init_dma_aec62xx,
.channels = 2,
.autodma = NOAUTODMA,
.bootable = OFF_BOARD,
@@ -333,28 +290,25 @@ static ide_pci_device_t aec62xx_chipsets[] __devinitdata = {
.init_setup = init_setup_aec62xx,
.init_chipset = init_chipset_aec62xx,
.init_hwif = init_hwif_aec62xx,
- .init_dma = init_dma_aec62xx,
.channels = 2,
.autodma = AUTODMA,
.enablebits = {{0x4a,0x02,0x02}, {0x4a,0x04,0x04}},
.bootable = NEVER_BOARD,
.udma_mask = 0x1f, /* udma0-4 */
},{ /* 3 */
- .name = "AEC6X80",
+ .name = "AEC6280",
.init_setup = init_setup_aec6x80,
.init_chipset = init_chipset_aec62xx,
.init_hwif = init_hwif_aec62xx,
- .init_dma = init_dma_aec62xx,
.channels = 2,
.autodma = AUTODMA,
.bootable = OFF_BOARD,
.udma_mask = 0x3f, /* udma0-5 */
},{ /* 4 */
- .name = "AEC6X80R",
+ .name = "AEC6280R",
.init_setup = init_setup_aec6x80,
.init_chipset = init_chipset_aec62xx,
.init_hwif = init_hwif_aec62xx,
- .init_dma = init_dma_aec62xx,
.channels = 2,
.autodma = AUTODMA,
.enablebits = {{0x4a,0x02,0x02}, {0x4a,0x04,0x04}},
@@ -370,13 +324,16 @@ static ide_pci_device_t aec62xx_chipsets[] __devinitdata = {
*
* Called when the PCI registration layer (or the IDE initialization)
* finds a device matching our IDE device tables.
+ *
+ * NOTE: since we're going to modify the 'name' field for AEC-6[26]80[R]
+ * chips, pass a local copy of 'struct pci_device_id' down the call chain.
*/
static int __devinit aec62xx_init_one(struct pci_dev *dev, const struct pci_device_id *id)
{
- ide_pci_device_t *d = &aec62xx_chipsets[id->driver_data];
+ ide_pci_device_t d = aec62xx_chipsets[id->driver_data];
- return d->init_setup(dev, d);
+ return d.init_setup(dev, &d);
}
static struct pci_device_id aec62xx_pci_tbl[] = {
diff --git a/drivers/ide/pci/alim15x3.c b/drivers/ide/pci/alim15x3.c
index 27525ec2e19..8a6b27b3bcc 100644
--- a/drivers/ide/pci/alim15x3.c
+++ b/drivers/ide/pci/alim15x3.c
@@ -1,5 +1,5 @@
/*
- * linux/drivers/ide/pci/alim15x3.c Version 0.21 2007/02/03
+ * linux/drivers/ide/pci/alim15x3.c Version 0.25 Jun 9 2007
*
* Copyright (C) 1998-2000 Michel Aubry, Maintainer
* Copyright (C) 1998-2000 Andrzej Krzysztofowicz, Maintainer
@@ -10,6 +10,7 @@
* Copyright (C) 2002 Alan Cox <alan@redhat.com>
* ALi (now ULi M5228) support by Clear Zhang <Clear.Zhang@ali.com.tw>
* Copyright (C) 2007 MontaVista Software, Inc. <source@mvista.com>
+ * Copyright (C) 2007 Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
*
* (U)DMA capable version of ali 1533/1543(C), 1535(D)
*
@@ -36,6 +37,7 @@
#include <linux/hdreg.h>
#include <linux/ide.h>
#include <linux/init.h>
+#include <linux/dmi.h>
#include <asm/io.h>
@@ -583,6 +585,35 @@ out:
return 0;
}
+/*
+ * Cable special cases
+ */
+
+static struct dmi_system_id cable_dmi_table[] = {
+ {
+ .ident = "HP Pavilion N5430",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
+ DMI_MATCH(DMI_BOARD_NAME, "OmniBook N32N-736"),
+ },
+ },
+ { }
+};
+
+static int ali_cable_override(struct pci_dev *pdev)
+{
+ /* Fujitsu P2000 */
+ if (pdev->subsystem_vendor == 0x10CF &&
+ pdev->subsystem_device == 0x10AF)
+ return 1;
+
+ /* Systems by DMI */
+ if (dmi_check_system(cable_dmi_table))
+ return 1;
+
+ return 0;
+}
+
/**
* ata66_ali15x3 - check for UDMA 66 support
* @hwif: IDE interface
@@ -594,37 +625,31 @@ out:
* FIXME: frobs bits that are not defined on newer ALi devicea
*/
-static unsigned int __devinit ata66_ali15x3 (ide_hwif_t *hwif)
+static u8 __devinit ata66_ali15x3(ide_hwif_t *hwif)
{
struct pci_dev *dev = hwif->pci_dev;
- unsigned int ata66 = 0;
- u8 cable_80_pin[2] = { 0, 0 };
-
unsigned long flags;
- u8 tmpbyte;
+ u8 cbl = ATA_CBL_PATA40, tmpbyte;
local_irq_save(flags);
if (m5229_revision >= 0xC2) {
/*
- * Ultra66 cable detection (from Host View)
- * m5229, 0x4a, bit0: primary, bit1: secondary 80 pin
- */
- pci_read_config_byte(dev, 0x4a, &tmpbyte);
- /*
- * 0x4a, bit0 is 0 => primary channel
- * has 80-pin (from host view)
- */
- if (!(tmpbyte & 0x01)) cable_80_pin[0] = 1;
- /*
- * 0x4a, bit1 is 0 => secondary channel
- * has 80-pin (from host view)
- */
- if (!(tmpbyte & 0x02)) cable_80_pin[1] = 1;
- /*
- * Allow ata66 if cable of current channel has 80 pins
+ * m5229 80-pin cable detection (from Host View)
+ *
+ * 0x4a bit0 is 0 => primary channel has 80-pin
+ * 0x4a bit1 is 0 => secondary channel has 80-pin
+ *
+ * Certain laptops use short but suitable cables
+ * and don't implement the detect logic.
*/
- ata66 = (hwif->channel)?cable_80_pin[1]:cable_80_pin[0];
+ if (ali_cable_override(dev))
+ cbl = ATA_CBL_PATA40_SHORT;
+ else {
+ pci_read_config_byte(dev, 0x4a, &tmpbyte);
+ if ((tmpbyte & (1 << hwif->channel)) == 0)
+ cbl = ATA_CBL_PATA80;
+ }
} else {
/*
* check m1533, 0x5e, bit 1~4 == 1001 => & 00011110 = 00010010
@@ -657,7 +682,7 @@ static unsigned int __devinit ata66_ali15x3 (ide_hwif_t *hwif)
local_irq_restore(flags);
- return(ata66);
+ return cbl;
}
/**
@@ -708,8 +733,9 @@ static void __devinit init_hwif_common_ali15x3 (ide_hwif_t *hwif)
hwif->dma_setup = &ali15x3_dma_setup;
if (!noautodma)
hwif->autodma = 1;
- if (!(hwif->udma_four))
- hwif->udma_four = ata66_ali15x3(hwif);
+
+ if (hwif->cbl != ATA_CBL_PATA40_SHORT)
+ hwif->cbl = ata66_ali15x3(hwif);
}
hwif->drives[0].autodma = hwif->autodma;
hwif->drives[1].autodma = hwif->autodma;
diff --git a/drivers/ide/pci/amd74xx.c b/drivers/ide/pci/amd74xx.c
index a2be65fcf89..84ed30cdb32 100644
--- a/drivers/ide/pci/amd74xx.c
+++ b/drivers/ide/pci/amd74xx.c
@@ -1,10 +1,11 @@
/*
- * Version 2.16
+ * Version 2.20
*
* AMD 755/756/766/8111 and nVidia nForce/2/2s/3/3s/CK804/MCP04
* IDE driver for Linux.
*
* Copyright (c) 2000-2002 Vojtech Pavlik
+ * Copyright (c) 2007 Bartlomiej Zolnierkiewicz
*
* Based on the work of:
* Andre Hedrick
@@ -37,11 +38,6 @@
#define AMD_ADDRESS_SETUP (0x0c + amd_config->base)
#define AMD_UDMA_TIMING (0x10 + amd_config->base)
-#define AMD_UDMA 0x07
-#define AMD_UDMA_33 0x01
-#define AMD_UDMA_66 0x02
-#define AMD_UDMA_100 0x03
-#define AMD_UDMA_133 0x04
#define AMD_CHECK_SWDMA 0x08
#define AMD_BAD_SWDMA 0x10
#define AMD_BAD_FIFO 0x20
@@ -53,32 +49,33 @@
static struct amd_ide_chip {
unsigned short id;
- unsigned long base;
- unsigned char flags;
+ u8 base;
+ u8 udma_mask;
+ u8 flags;
} amd_ide_chips[] = {
- { PCI_DEVICE_ID_AMD_COBRA_7401, 0x40, AMD_UDMA_33 | AMD_BAD_SWDMA },
- { PCI_DEVICE_ID_AMD_VIPER_7409, 0x40, AMD_UDMA_66 | AMD_CHECK_SWDMA },
- { PCI_DEVICE_ID_AMD_VIPER_7411, 0x40, AMD_UDMA_100 | AMD_BAD_FIFO },
- { PCI_DEVICE_ID_AMD_OPUS_7441, 0x40, AMD_UDMA_100 },
- { PCI_DEVICE_ID_AMD_8111_IDE, 0x40, AMD_UDMA_133 | AMD_CHECK_SERENADE },
- { PCI_DEVICE_ID_NVIDIA_NFORCE_IDE, 0x50, AMD_UDMA_100 },
- { PCI_DEVICE_ID_NVIDIA_NFORCE2_IDE, 0x50, AMD_UDMA_133 },
- { PCI_DEVICE_ID_NVIDIA_NFORCE2S_IDE, 0x50, AMD_UDMA_133 },
- { PCI_DEVICE_ID_NVIDIA_NFORCE2S_SATA, 0x50, AMD_UDMA_133 },
- { PCI_DEVICE_ID_NVIDIA_NFORCE3_IDE, 0x50, AMD_UDMA_133 },
- { PCI_DEVICE_ID_NVIDIA_NFORCE3S_IDE, 0x50, AMD_UDMA_133 },
- { PCI_DEVICE_ID_NVIDIA_NFORCE3S_SATA, 0x50, AMD_UDMA_133 },
- { PCI_DEVICE_ID_NVIDIA_NFORCE3S_SATA2, 0x50, AMD_UDMA_133 },
- { PCI_DEVICE_ID_NVIDIA_NFORCE_CK804_IDE, 0x50, AMD_UDMA_133 },
- { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_IDE, 0x50, AMD_UDMA_133 },
- { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_IDE, 0x50, AMD_UDMA_133 },
- { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE, 0x50, AMD_UDMA_133 },
- { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_IDE, 0x50, AMD_UDMA_133 },
- { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP65_IDE, 0x50, AMD_UDMA_133 },
- { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP67_IDE, 0x50, AMD_UDMA_133 },
- { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP73_IDE, 0x50, AMD_UDMA_133 },
- { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP77_IDE, 0x50, AMD_UDMA_133 },
- { PCI_DEVICE_ID_AMD_CS5536_IDE, 0x40, AMD_UDMA_100 },
+ { PCI_DEVICE_ID_AMD_COBRA_7401, 0x40, ATA_UDMA2, AMD_BAD_SWDMA },
+ { PCI_DEVICE_ID_AMD_VIPER_7409, 0x40, ATA_UDMA4, AMD_CHECK_SWDMA },
+ { PCI_DEVICE_ID_AMD_VIPER_7411, 0x40, ATA_UDMA5, AMD_BAD_FIFO },
+ { PCI_DEVICE_ID_AMD_OPUS_7441, 0x40, ATA_UDMA5, },
+ { PCI_DEVICE_ID_AMD_8111_IDE, 0x40, ATA_UDMA6, AMD_CHECK_SERENADE },
+ { PCI_DEVICE_ID_NVIDIA_NFORCE_IDE, 0x50, ATA_UDMA5, },
+ { PCI_DEVICE_ID_NVIDIA_NFORCE2_IDE, 0x50, ATA_UDMA6, },
+ { PCI_DEVICE_ID_NVIDIA_NFORCE2S_IDE, 0x50, ATA_UDMA6, },
+ { PCI_DEVICE_ID_NVIDIA_NFORCE2S_SATA, 0x50, ATA_UDMA6, },
+ { PCI_DEVICE_ID_NVIDIA_NFORCE3_IDE, 0x50, ATA_UDMA6, },
+ { PCI_DEVICE_ID_NVIDIA_NFORCE3S_IDE, 0x50, ATA_UDMA6, },
+ { PCI_DEVICE_ID_NVIDIA_NFORCE3S_SATA, 0x50, ATA_UDMA6, },
+ { PCI_DEVICE_ID_NVIDIA_NFORCE3S_SATA2, 0x50, ATA_UDMA6, },
+ { PCI_DEVICE_ID_NVIDIA_NFORCE_CK804_IDE, 0x50, ATA_UDMA6, },
+ { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_IDE, 0x50, ATA_UDMA6, },
+ { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_IDE, 0x50, ATA_UDMA6, },
+ { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE, 0x50, ATA_UDMA6, },
+ { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_IDE, 0x50, ATA_UDMA6, },
+ { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP65_IDE, 0x50, ATA_UDMA6, },
+ { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP67_IDE, 0x50, ATA_UDMA6, },
+ { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP73_IDE, 0x50, ATA_UDMA6, },
+ { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP77_IDE, 0x50, ATA_UDMA6, },
+ { PCI_DEVICE_ID_AMD_CS5536_IDE, 0x40, ATA_UDMA5, },
{ 0 }
};
@@ -87,7 +84,7 @@ static ide_pci_device_t *amd_chipset;
static unsigned int amd_80w;
static unsigned int amd_clock;
-static char *amd_dma[] = { "MWDMA16", "UDMA33", "UDMA66", "UDMA100", "UDMA133" };
+static char *amd_dma[] = { "16", "25", "33", "44", "66", "100", "133" };
static unsigned char amd_cyc2udma[] = { 6, 6, 5, 4, 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 7 };
/*
@@ -128,7 +125,7 @@ static int amd74xx_get_info(char *buffer, char **addr, off_t offset, int count)
pci_read_config_byte(dev, PCI_REVISION_ID, &t);
amd_print("Revision: IDE %#x", t);
- amd_print("Highest DMA rate: %s", amd_dma[amd_config->flags & AMD_UDMA]);
+ amd_print("Highest DMA rate: UDMA%s", amd_dma[fls(amd_config->udma_mask) - 1]);
amd_print("BM-DMA base: %#lx", amd_base);
amd_print("PCI clock: %d.%dMHz", amd_clock / 1000, amd_clock / 100 % 10);
@@ -221,12 +218,12 @@ static void amd_set_speed(struct pci_dev *dev, unsigned char dn, struct ide_timi
pci_write_config_byte(dev, AMD_DRIVE_TIMING + (3 - dn),
((FIT(timing->active, 1, 16) - 1) << 4) | (FIT(timing->recover, 1, 16) - 1));
- switch (amd_config->flags & AMD_UDMA) {
- case AMD_UDMA_33: t = timing->udma ? (0xc0 | (FIT(timing->udma, 2, 5) - 2)) : 0x03; break;
- case AMD_UDMA_66: t = timing->udma ? (0xc0 | amd_cyc2udma[FIT(timing->udma, 2, 10)]) : 0x03; break;
- case AMD_UDMA_100: t = timing->udma ? (0xc0 | amd_cyc2udma[FIT(timing->udma, 1, 10)]) : 0x03; break;
- case AMD_UDMA_133: t = timing->udma ? (0xc0 | amd_cyc2udma[FIT(timing->udma, 1, 15)]) : 0x03; break;
- default: return;
+ switch (amd_config->udma_mask) {
+ case ATA_UDMA2: t = timing->udma ? (0xc0 | (FIT(timing->udma, 2, 5) - 2)) : 0x03; break;
+ case ATA_UDMA4: t = timing->udma ? (0xc0 | amd_cyc2udma[FIT(timing->udma, 2, 10)]) : 0x03; break;
+ case ATA_UDMA5: t = timing->udma ? (0xc0 | amd_cyc2udma[FIT(timing->udma, 1, 10)]) : 0x03; break;
+ case ATA_UDMA6: t = timing->udma ? (0xc0 | amd_cyc2udma[FIT(timing->udma, 1, 15)]) : 0x03; break;
+ default: return;
}
pci_write_config_byte(dev, AMD_UDMA_TIMING + (3 - dn), t);
@@ -248,7 +245,7 @@ static int amd_set_drive(ide_drive_t *drive, u8 speed)
ide_config_drive_speed(drive, speed);
T = 1000000000 / amd_clock;
- UT = T / min_t(int, max_t(int, amd_config->flags & AMD_UDMA, 1), 2);
+ UT = (amd_config->udma_mask == ATA_UDMA2) ? T : (T / 2);
ide_timing_compute(drive, speed, &t, T, UT);
@@ -277,29 +274,19 @@ static int amd_set_drive(ide_drive_t *drive, u8 speed)
static void amd74xx_tune_drive(ide_drive_t *drive, u8 pio)
{
if (pio == 255) {
- amd_set_drive(drive, ide_find_best_mode(drive, XFER_PIO | XFER_EPIO));
+ amd_set_drive(drive, ide_find_best_pio_mode(drive));
return;
}
amd_set_drive(drive, XFER_PIO_0 + min_t(byte, pio, 5));
}
-/*
- * amd74xx_dmaproc() is a callback from upper layers that can do
- * a lot, but we use it for DMA/PIO tuning only, delegating everything
- * else to the default ide_dmaproc().
- */
-
static int amd74xx_ide_dma_check(ide_drive_t *drive)
{
- int w80 = HWIF(drive)->udma_four;
+ u8 speed = ide_max_dma_mode(drive);
- u8 speed = ide_find_best_mode(drive,
- XFER_PIO | XFER_EPIO | XFER_MWDMA | XFER_UDMA |
- ((amd_config->flags & AMD_BAD_SWDMA) ? 0 : XFER_SWDMA) |
- (w80 && (amd_config->flags & AMD_UDMA) >= AMD_UDMA_66 ? XFER_UDMA_66 : 0) |
- (w80 && (amd_config->flags & AMD_UDMA) >= AMD_UDMA_100 ? XFER_UDMA_100 : 0) |
- (w80 && (amd_config->flags & AMD_UDMA) >= AMD_UDMA_133 ? XFER_UDMA_133 : 0));
+ if (speed == 0)
+ speed = ide_find_best_pio_mode(drive);
amd_set_drive(drive, speed);
@@ -334,10 +321,10 @@ static unsigned int __devinit init_chipset_amd74xx(struct pci_dev *dev, const ch
* Check 80-wire cable presence.
*/
- switch (amd_config->flags & AMD_UDMA) {
+ switch (amd_config->udma_mask) {
- case AMD_UDMA_133:
- case AMD_UDMA_100:
+ case ATA_UDMA6:
+ case ATA_UDMA5:
pci_read_config_byte(dev, AMD_CABLE_DETECT, &t);
pci_read_config_dword(dev, AMD_UDMA_TIMING, &u);
amd_80w = ((t & 0x3) ? 1 : 0) | ((t & 0xc) ? 2 : 0);
@@ -349,7 +336,7 @@ static unsigned int __devinit init_chipset_amd74xx(struct pci_dev *dev, const ch
}
break;
- case AMD_UDMA_66:
+ case ATA_UDMA4:
/* no host side cable detection */
amd_80w = 0x03;
break;
@@ -370,7 +357,7 @@ static unsigned int __devinit init_chipset_amd74xx(struct pci_dev *dev, const ch
if ((amd_config->flags & AMD_CHECK_SERENADE) &&
dev->subsystem_vendor == PCI_VENDOR_ID_AMD &&
dev->subsystem_device == PCI_DEVICE_ID_AMD_SERENADE)
- amd_config->flags = AMD_UDMA_100;
+ amd_config->udma_mask = ATA_UDMA5;
/*
* Determine the system bus clock.
@@ -395,8 +382,9 @@ static unsigned int __devinit init_chipset_amd74xx(struct pci_dev *dev, const ch
*/
pci_read_config_byte(dev, PCI_REVISION_ID, &t);
- printk(KERN_INFO "%s: %s (rev %02x) %s controller\n",
- amd_chipset->name, pci_name(dev), t, amd_dma[amd_config->flags & AMD_UDMA]);
+ printk(KERN_INFO "%s: %s (rev %02x) UDMA%s controller\n",
+ amd_chipset->name, pci_name(dev), t,
+ amd_dma[fls(amd_config->udma_mask) - 1]);
/*
* Register /proc/ide/amd74xx entry
@@ -437,12 +425,19 @@ static void __devinit init_hwif_amd74xx(ide_hwif_t *hwif)
return;
hwif->atapi_dma = 1;
- hwif->ultra_mask = 0x7f;
- hwif->mwdma_mask = 0x07;
- hwif->swdma_mask = 0x07;
- if (!hwif->udma_four)
- hwif->udma_four = (amd_80w >> hwif->channel) & 1;
+ hwif->ultra_mask = amd_config->udma_mask;
+ hwif->mwdma_mask = 0x07;
+ if ((amd_config->flags & AMD_BAD_SWDMA) == 0)
+ hwif->swdma_mask = 0x07;
+
+ if (hwif->cbl != ATA_CBL_PATA40_SHORT) {
+ if ((amd_80w >> hwif->channel) & 1)
+ hwif->cbl = ATA_CBL_PATA80;
+ else
+ hwif->cbl = ATA_CBL_PATA40;
+ }
+
hwif->ide_dma_check = &amd74xx_ide_dma_check;
if (!noautodma)
hwif->autodma = 1;
diff --git a/drivers/ide/pci/atiixp.c b/drivers/ide/pci/atiixp.c
index 8ab33faf6f7..2761510309b 100644
--- a/drivers/ide/pci/atiixp.c
+++ b/drivers/ide/pci/atiixp.c
@@ -264,10 +264,11 @@ static void __devinit init_hwif_atiixp(ide_hwif_t *hwif)
hwif->swdma_mask = 0x04;
pci_read_config_byte(pdev, ATIIXP_IDE_UDMA_MODE + ch, &udma_mode);
+
if ((udma_mode & 0x07) >= 0x04 || (udma_mode & 0x70) >= 0x40)
- hwif->udma_four = 1;
+ hwif->cbl = ATA_CBL_PATA80;
else
- hwif->udma_four = 0;
+ hwif->cbl = ATA_CBL_PATA40;
hwif->dma_host_on = &atiixp_dma_host_on;
hwif->dma_host_off = &atiixp_dma_host_off;
diff --git a/drivers/ide/pci/cmd64x.c b/drivers/ide/pci/cmd64x.c
index 7c57dc696f5..8631b6c8aa1 100644
--- a/drivers/ide/pci/cmd64x.c
+++ b/drivers/ide/pci/cmd64x.c
@@ -1,5 +1,5 @@
/*
- * linux/drivers/ide/pci/cmd64x.c Version 1.47 Mar 19, 2007
+ * linux/drivers/ide/pci/cmd64x.c Version 1.50 May 10, 2007
*
* cmd64x.c: Enable interrupts at initialization time on Ultra/PCI machines.
* Due to massive hardware bugs, UltraDMA is only supported
@@ -52,9 +52,6 @@
#define ARTTIM23_DIS_RA2 0x04
#define ARTTIM23_DIS_RA3 0x08
#define ARTTIM23_INTR_CH1 0x10
-#define ARTTIM2 0x57
-#define ARTTIM3 0x57
-#define DRWTIM23 0x58
#define DRWTIM2 0x58
#define BRST 0x59
#define DRWTIM3 0x5b
@@ -469,71 +466,43 @@ static int cmd646_1_ide_dma_end (ide_drive_t *drive)
static unsigned int __devinit init_chipset_cmd64x(struct pci_dev *dev, const char *name)
{
- u32 class_rev = 0;
u8 mrdmode = 0;
- pci_read_config_dword(dev, PCI_CLASS_REVISION, &class_rev);
- class_rev &= 0xff;
+ if (dev->device == PCI_DEVICE_ID_CMD_646) {
+ u8 rev = 0;
- switch(dev->device) {
- case PCI_DEVICE_ID_CMD_643:
- break;
- case PCI_DEVICE_ID_CMD_646:
- printk(KERN_INFO "%s: chipset revision 0x%02X, ", name, class_rev);
- switch(class_rev) {
- case 0x07:
- case 0x05:
- printk("UltraDMA Capable");
- break;
- case 0x03:
- printk("MultiWord DMA Force Limited");
- break;
- case 0x01:
- default:
- printk("MultiWord DMA Limited, IRQ workaround enabled");
- break;
- }
- printk("\n");
- break;
- case PCI_DEVICE_ID_CMD_648:
- case PCI_DEVICE_ID_CMD_649:
+ pci_read_config_byte(dev, PCI_REVISION_ID, &rev);
+
+ switch (rev) {
+ case 0x07:
+ case 0x05:
+ printk("%s: UltraDMA capable", name);
break;
+ case 0x03:
default:
+ printk("%s: MultiWord DMA force limited", name);
+ break;
+ case 0x01:
+ printk("%s: MultiWord DMA limited, "
+ "IRQ workaround enabled\n", name);
break;
+ }
}
/* Set a good latency timer and cache line size value. */
(void) pci_write_config_byte(dev, PCI_LATENCY_TIMER, 64);
/* FIXME: pci_set_master() to ensure a good latency timer value */
- /* Setup interrupts. */
- (void) pci_read_config_byte(dev, MRDMODE, &mrdmode);
- mrdmode &= ~(0x30);
- (void) pci_write_config_byte(dev, MRDMODE, mrdmode);
-
- /* Use MEMORY READ LINE for reads.
- * NOTE: Although not mentioned in the PCI0646U specs,
- * these bits are write only and won't be read
- * back as set or not. The PCI0646U2 specs clarify
- * this point.
+ /*
+ * Enable interrupts, select MEMORY READ LINE for reads.
+ *
+ * NOTE: although not mentioned in the PCI0646U specs,
+ * bits 0-1 are write only and won't be read back as
+ * set or not -- PCI0646U2 specs clarify this point.
*/
- (void) pci_write_config_byte(dev, MRDMODE, mrdmode | 0x02);
-
- /* Set reasonable active/recovery/address-setup values. */
- (void) pci_write_config_byte(dev, ARTTIM0, 0x40);
- (void) pci_write_config_byte(dev, DRWTIM0, 0x3f);
- (void) pci_write_config_byte(dev, ARTTIM1, 0x40);
- (void) pci_write_config_byte(dev, DRWTIM1, 0x3f);
-#ifdef __i386__
- (void) pci_write_config_byte(dev, ARTTIM23, 0x1c);
-#else
- (void) pci_write_config_byte(dev, ARTTIM23, 0x5c);
-#endif
- (void) pci_write_config_byte(dev, DRWTIM23, 0x3f);
- (void) pci_write_config_byte(dev, DRWTIM3, 0x3f);
-#ifdef CONFIG_PPC
- (void) pci_write_config_byte(dev, UDIDETCR0, 0xf0);
-#endif /* CONFIG_PPC */
+ (void) pci_read_config_byte (dev, MRDMODE, &mrdmode);
+ mrdmode &= ~0x30;
+ (void) pci_write_config_byte(dev, MRDMODE, (mrdmode | 0x02));
#if defined(DISPLAY_CMD64X_TIMINGS) && defined(CONFIG_IDE_PROC_FS)
@@ -548,29 +517,27 @@ static unsigned int __devinit init_chipset_cmd64x(struct pci_dev *dev, const cha
return 0;
}
-static unsigned int __devinit ata66_cmd64x(ide_hwif_t *hwif)
+static u8 __devinit ata66_cmd64x(ide_hwif_t *hwif)
{
- u8 ata66 = 0, mask = (hwif->channel) ? 0x02 : 0x01;
+ struct pci_dev *dev = hwif->pci_dev;
+ u8 bmidecsr = 0, mask = hwif->channel ? 0x02 : 0x01;
- switch(hwif->pci_dev->device) {
- case PCI_DEVICE_ID_CMD_643:
- case PCI_DEVICE_ID_CMD_646:
- return ata66;
- default:
- break;
+ switch (dev->device) {
+ case PCI_DEVICE_ID_CMD_648:
+ case PCI_DEVICE_ID_CMD_649:
+ pci_read_config_byte(dev, BMIDECSR, &bmidecsr);
+ return (bmidecsr & mask) ? ATA_CBL_PATA80 : ATA_CBL_PATA40;
+ default:
+ return ATA_CBL_PATA40;
}
- pci_read_config_byte(hwif->pci_dev, BMIDECSR, &ata66);
- return (ata66 & mask) ? 1 : 0;
}
static void __devinit init_hwif_cmd64x(ide_hwif_t *hwif)
{
struct pci_dev *dev = hwif->pci_dev;
- unsigned int class_rev;
+ u8 rev = 0;
- hwif->autodma = 0;
- pci_read_config_dword(dev, PCI_CLASS_REVISION, &class_rev);
- class_rev &= 0xff;
+ pci_read_config_byte(dev, PCI_REVISION_ID, &rev);
hwif->tuneproc = &cmd64x_tune_drive;
hwif->speedproc = &cmd64x_tune_chipset;
@@ -580,8 +547,8 @@ static void __devinit init_hwif_cmd64x(ide_hwif_t *hwif)
if (!hwif->dma_base)
return;
- hwif->atapi_dma = 1;
-
+ hwif->atapi_dma = 1;
+ hwif->mwdma_mask = 0x07;
hwif->ultra_mask = hwif->cds->udma_mask;
/*
@@ -596,16 +563,15 @@ static void __devinit init_hwif_cmd64x(ide_hwif_t *hwif)
*
* So we only do UltraDMA on revision 0x05 and 0x07 chipsets.
*/
- if (dev->device == PCI_DEVICE_ID_CMD_646 && class_rev < 5)
+ if (dev->device == PCI_DEVICE_ID_CMD_646 && rev < 5)
hwif->ultra_mask = 0x00;
- hwif->mwdma_mask = 0x07;
-
hwif->ide_dma_check = &cmd64x_config_drive_for_dma;
- if (!(hwif->udma_four))
- hwif->udma_four = ata66_cmd64x(hwif);
- switch(dev->device) {
+ if (hwif->cbl != ATA_CBL_PATA40_SHORT)
+ hwif->cbl = ata66_cmd64x(hwif);
+
+ switch (dev->device) {
case PCI_DEVICE_ID_CMD_648:
case PCI_DEVICE_ID_CMD_649:
alt_irq_bits:
@@ -614,10 +580,10 @@ static void __devinit init_hwif_cmd64x(ide_hwif_t *hwif)
break;
case PCI_DEVICE_ID_CMD_646:
hwif->chipset = ide_cmd646;
- if (class_rev == 0x01) {
+ if (rev == 0x01) {
hwif->ide_dma_end = &cmd646_1_ide_dma_end;
break;
- } else if (class_rev >= 0x03)
+ } else if (rev >= 0x03)
goto alt_irq_bits;
/* fall thru */
default:
@@ -626,11 +592,9 @@ static void __devinit init_hwif_cmd64x(ide_hwif_t *hwif)
break;
}
-
if (!noautodma)
hwif->autodma = 1;
- hwif->drives[0].autodma = hwif->autodma;
- hwif->drives[1].autodma = hwif->autodma;
+ hwif->drives[0].autodma = hwif->drives[1].autodma = hwif->autodma;
}
static int __devinit init_setup_cmd64x(struct pci_dev *dev, ide_pci_device_t *d)
diff --git a/drivers/ide/pci/cs5535.c b/drivers/ide/pci/cs5535.c
index 41925c47ef0..10f61f38243 100644
--- a/drivers/ide/pci/cs5535.c
+++ b/drivers/ide/pci/cs5535.c
@@ -187,7 +187,8 @@ static u8 __devinit cs5535_cable_detect(struct pci_dev *dev)
/* if a 80 wire cable was detected */
pci_read_config_byte(dev, CS5535_CABLE_DETECT, &bit);
- return (bit & 1);
+
+ return (bit & 1) ? ATA_CBL_PATA80 : ATA_CBL_PATA40;
}
/****
@@ -212,8 +213,7 @@ static void __devinit init_hwif_cs5535(ide_hwif_t *hwif)
hwif->ultra_mask = 0x1F;
hwif->mwdma_mask = 0x07;
-
- hwif->udma_four = cs5535_cable_detect(hwif->pci_dev);
+ hwif->cbl = cs5535_cable_detect(hwif->pci_dev);
if (!noautodma)
hwif->autodma = 1;
diff --git a/drivers/ide/pci/hpt366.c b/drivers/ide/pci/hpt366.c
index c33d0b0f11c..4b6bae8eee8 100644
--- a/drivers/ide/pci/hpt366.c
+++ b/drivers/ide/pci/hpt366.c
@@ -1,5 +1,5 @@
/*
- * linux/drivers/ide/pci/hpt366.c Version 1.06 Jun 27, 2007
+ * linux/drivers/ide/pci/hpt366.c Version 1.10 Jun 29, 2007
*
* Copyright (C) 1999-2003 Andre Hedrick <andre@linux-ide.org>
* Portions Copyright (C) 2001 Sun Microsystems, Inc.
@@ -77,7 +77,7 @@
* since they may tamper with its fields
* - prefix the driver startup messages with the real chip name
* - claim the extra 240 bytes of I/O space for all chips
- * - optimize the rate masking/filtering and the drive list lookup code
+ * - optimize the UltraDMA filtering and the drive list lookup code
* - use pci_get_slot() to get to the function 1 of HPT36x/374
* - cache offset of the channel's misc. control registers (MCRs) being used
* throughout the driver
@@ -99,9 +99,9 @@
* stop duplicating it for each channel by storing the pointer in the pci_dev
* structure: first, at the init_setup stage, point it to a static "template"
* with only the chip type and its specific base DPLL frequency, the highest
- * supported DMA mode, and the chip settings table pointer filled, then, at
- * the init_chipset stage, allocate per-chip instance and fill it with the
- * rest of the necessary information
+ * UltraDMA mode, and the chip settings table pointer filled, then, at the
+ * init_chipset stage, allocate per-chip instance and fill it with the rest
+ * of the necessary information
* - get rid of the constant thresholds in the HPT37x PCI clock detection code,
* switch to calculating PCI clock frequency based on the chip's base DPLL
* frequency
@@ -112,6 +112,7 @@
* also fixing the interchanged 25/40 MHz PCI clock cases for HPT36x chips;
* unify HPT36x/37x timing setup code and the speedproc handlers by joining
* the register setting lists into the table indexed by the clock selected
+ * - set the correct hwif->ultra_mask for each individual chip
* Sergei Shtylyov, <sshtylyov@ru.mvista.com> or <source@mvista.com>
*/
@@ -391,7 +392,7 @@ enum ata_clock {
struct hpt_info {
u8 chip_type; /* Chip type */
- u8 max_mode; /* Speeds allowed */
+ u8 max_ultra; /* Max. UltraDMA mode allowed */
u8 dpll_clk; /* DPLL clock in MHz */
u8 pci_clk; /* PCI clock in MHz */
u32 **settings; /* Chipset settings table */
@@ -430,77 +431,77 @@ static u32 *hpt37x_settings[NUM_ATA_CLOCKS] = {
static struct hpt_info hpt36x __devinitdata = {
.chip_type = HPT36x,
- .max_mode = (HPT366_ALLOW_ATA66_4 || HPT366_ALLOW_ATA66_3) ? 2 : 1,
+ .max_ultra = HPT366_ALLOW_ATA66_3 ? (HPT366_ALLOW_ATA66_4 ? 4 : 3) : 2,
.dpll_clk = 0, /* no DPLL */
.settings = hpt36x_settings
};
static struct hpt_info hpt370 __devinitdata = {
.chip_type = HPT370,
- .max_mode = HPT370_ALLOW_ATA100_5 ? 3 : 2,
+ .max_ultra = HPT370_ALLOW_ATA100_5 ? 5 : 4,
.dpll_clk = 48,
.settings = hpt37x_settings
};
static struct hpt_info hpt370a __devinitdata = {
.chip_type = HPT370A,
- .max_mode = HPT370_ALLOW_ATA100_5 ? 3 : 2,
+ .max_ultra = HPT370_ALLOW_ATA100_5 ? 5 : 4,
.dpll_clk = 48,
.settings = hpt37x_settings
};
static struct hpt_info hpt374 __devinitdata = {
.chip_type = HPT374,
- .max_mode = 3,
+ .max_ultra = 5,
.dpll_clk = 48,
.settings = hpt37x_settings
};
static struct hpt_info hpt372 __devinitdata = {
.chip_type = HPT372,
- .max_mode = HPT372_ALLOW_ATA133_6 ? 4 : 3,
+ .max_ultra = HPT372_ALLOW_ATA133_6 ? 6 : 5,
.dpll_clk = 55,
.settings = hpt37x_settings
};
static struct hpt_info hpt372a __devinitdata = {
.chip_type = HPT372A,
- .max_mode = HPT372_ALLOW_ATA133_6 ? 4 : 3,
+ .max_ultra = HPT372_ALLOW_ATA133_6 ? 6 : 5,
.dpll_clk = 66,
.settings = hpt37x_settings
};
static struct hpt_info hpt302 __devinitdata = {
.chip_type = HPT302,
- .max_mode = HPT302_ALLOW_ATA133_6 ? 4 : 3,
+ .max_ultra = HPT372_ALLOW_ATA133_6 ? 6 : 5,
.dpll_clk = 66,
.settings = hpt37x_settings
};
static struct hpt_info hpt371 __devinitdata = {
.chip_type = HPT371,
- .max_mode = HPT371_ALLOW_ATA133_6 ? 4 : 3,
+ .max_ultra = HPT371_ALLOW_ATA133_6 ? 6 : 5,
.dpll_clk = 66,
.settings = hpt37x_settings
};
static struct hpt_info hpt372n __devinitdata = {
.chip_type = HPT372N,
- .max_mode = HPT372_ALLOW_ATA133_6 ? 4 : 3,
+ .max_ultra = HPT372_ALLOW_ATA133_6 ? 6 : 5,
.dpll_clk = 77,
.settings = hpt37x_settings
};
static struct hpt_info hpt302n __devinitdata = {
.chip_type = HPT302N,
- .max_mode = HPT302_ALLOW_ATA133_6 ? 4 : 3,
+ .max_ultra = HPT302_ALLOW_ATA133_6 ? 6 : 5,
.dpll_clk = 77,
.settings = hpt37x_settings
};
static struct hpt_info hpt371n __devinitdata = {
.chip_type = HPT371N,
- .max_mode = HPT371_ALLOW_ATA133_6 ? 4 : 3,
+ .max_ultra = HPT371_ALLOW_ATA133_6 ? 6 : 5,
.dpll_clk = 77,
.settings = hpt37x_settings
};
@@ -523,53 +524,38 @@ static int check_in_drive_list(ide_drive_t *drive, const char **list)
static u8 hpt3xx_udma_filter(ide_drive_t *drive)
{
struct hpt_info *info = pci_get_drvdata(HWIF(drive)->pci_dev);
- u8 chip_type = info->chip_type;
- u8 mode = info->max_mode;
u8 mask;
- switch (mode) {
- case 0x04:
- mask = 0x7f;
- break;
- case 0x03:
+ switch (info->chip_type) {
+ case HPT370A:
+ if (!HPT370_ALLOW_ATA100_5 ||
+ check_in_drive_list(drive, bad_ata100_5))
+ return 0x1f;
+ else
+ return 0x3f;
+ case HPT370:
+ if (!HPT370_ALLOW_ATA100_5 ||
+ check_in_drive_list(drive, bad_ata100_5))
+ mask = 0x1f;
+ else
mask = 0x3f;
- if (chip_type >= HPT374)
- break;
- if (!check_in_drive_list(drive, bad_ata100_5))
- goto check_bad_ata33;
- /* fall thru */
- case 0x02:
+ break;
+ case HPT36x:
+ if (!HPT366_ALLOW_ATA66_4 ||
+ check_in_drive_list(drive, bad_ata66_4))
+ mask = 0x0f;
+ else
mask = 0x1f;
- /*
- * CHECK ME, Does this need to be changed to HPT374 ??
- */
- if (chip_type >= HPT370)
- goto check_bad_ata33;
- if (HPT366_ALLOW_ATA66_4 &&
- !check_in_drive_list(drive, bad_ata66_4))
- goto check_bad_ata33;
-
- mask = 0x0f;
- if (HPT366_ALLOW_ATA66_3 &&
- !check_in_drive_list(drive, bad_ata66_3))
- goto check_bad_ata33;
- /* fall thru */
- case 0x01:
+ if (!HPT366_ALLOW_ATA66_3 ||
+ check_in_drive_list(drive, bad_ata66_3))
mask = 0x07;
-
- check_bad_ata33:
- if (chip_type >= HPT370A)
- break;
- if (!check_in_drive_list(drive, bad_ata33))
- break;
- /* fall thru */
- case 0x00:
- default:
- mask = 0x00;
- break;
+ break;
+ default:
+ return 0x7f;
}
- return mask;
+
+ return check_in_drive_list(drive, bad_ata33) ? 0x00 : mask;
}
static u32 get_speed_setting(u8 speed, struct hpt_info *info)
@@ -737,7 +723,7 @@ static int hpt366_config_drive_xfer_rate(ide_drive_t *drive)
* This is specific to the HPT366 UDMA chipset
* by HighPoint|Triones Technologies, Inc.
*/
-static int hpt366_ide_dma_lostirq(ide_drive_t *drive)
+static void hpt366_dma_lost_irq(ide_drive_t *drive)
{
struct pci_dev *dev = HWIF(drive)->pci_dev;
u8 mcr1 = 0, mcr3 = 0, scr1 = 0;
@@ -749,7 +735,7 @@ static int hpt366_ide_dma_lostirq(ide_drive_t *drive)
drive->name, __FUNCTION__, mcr1, mcr3, scr1);
if (scr1 & 0x10)
pci_write_config_byte(dev, 0x5a, scr1 & ~0x10);
- return __ide_dma_lostirq(drive);
+ ide_dma_lost_irq(drive);
}
static void hpt370_clear_engine(ide_drive_t *drive)
@@ -799,10 +785,10 @@ static int hpt370_ide_dma_end(ide_drive_t *drive)
return __ide_dma_end(drive);
}
-static int hpt370_ide_dma_timeout(ide_drive_t *drive)
+static void hpt370_dma_timeout(ide_drive_t *drive)
{
hpt370_irq_timeout(drive);
- return __ide_dma_timeout(drive);
+ ide_dma_timeout(drive);
}
/* returns 1 if DMA IRQ issued, 0 otherwise */
@@ -1150,7 +1136,7 @@ static unsigned int __devinit init_chipset_hpt366(struct pci_dev *dev, const cha
* Select 66 MHz DPLL clock only if UltraATA/133 mode is
* supported/enabled, use 50 MHz DPLL clock otherwise...
*/
- if (info->max_mode == 0x04) {
+ if (info->max_ultra == 6) {
dpll_clk = 66;
clock = ATA_CLOCK_66MHZ;
} else if (dpll_clk) { /* HPT36x chips don't have DPLL */
@@ -1243,7 +1229,7 @@ static void __devinit init_hwif_hpt366(ide_hwif_t *hwif)
struct pci_dev *dev = hwif->pci_dev;
struct hpt_info *info = pci_get_drvdata(dev);
int serialize = HPT_SERIALIZE_IO;
- u8 scr1 = 0, ata66 = (hwif->channel) ? 0x01 : 0x02;
+ u8 scr1 = 0, ata66 = hwif->channel ? 0x01 : 0x02;
u8 chip_type = info->chip_type;
u8 new_mcr, old_mcr = 0;
@@ -1256,7 +1242,9 @@ static void __devinit init_hwif_hpt366(ide_hwif_t *hwif)
hwif->intrproc = &hpt3xx_intrproc;
hwif->maskproc = &hpt3xx_maskproc;
hwif->busproc = &hpt3xx_busproc;
- hwif->udma_filter = &hpt3xx_udma_filter;
+
+ if (chip_type <= HPT370A)
+ hwif->udma_filter = &hpt3xx_udma_filter;
/*
* HPT3xxN chips have some complications:
@@ -1305,7 +1293,7 @@ static void __devinit init_hwif_hpt366(ide_hwif_t *hwif)
return;
}
- hwif->ultra_mask = 0x7f;
+ hwif->ultra_mask = hwif->cds->udma_mask;
hwif->mwdma_mask = 0x07;
/*
@@ -1342,8 +1330,8 @@ static void __devinit init_hwif_hpt366(ide_hwif_t *hwif)
} else
pci_read_config_byte (dev, 0x5a, &scr1);
- if (!hwif->udma_four)
- hwif->udma_four = (scr1 & ata66) ? 0 : 1;
+ if (hwif->cbl != ATA_CBL_PATA40_SHORT)
+ hwif->cbl = (scr1 & ata66) ? ATA_CBL_PATA40 : ATA_CBL_PATA80;
hwif->ide_dma_check = &hpt366_config_drive_xfer_rate;
@@ -1353,9 +1341,9 @@ static void __devinit init_hwif_hpt366(ide_hwif_t *hwif)
} else if (chip_type >= HPT370) {
hwif->dma_start = &hpt370_ide_dma_start;
hwif->ide_dma_end = &hpt370_ide_dma_end;
- hwif->ide_dma_timeout = &hpt370_ide_dma_timeout;
+ hwif->dma_timeout = &hpt370_dma_timeout;
} else
- hwif->ide_dma_lostirq = &hpt366_ide_dma_lostirq;
+ hwif->dma_lost_irq = &hpt366_dma_lost_irq;
if (!noautodma)
hwif->autodma = 1;
@@ -1503,9 +1491,35 @@ static int __devinit init_setup_hpt366(struct pci_dev *dev, ide_pci_device_t *d)
pci_read_config_byte(dev, PCI_REVISION_ID, &rev);
- if (rev > 6)
+ switch (rev) {
+ case 0:
+ case 1:
+ case 2:
+ /*
+ * HPT36x chips have one channel per function and have
+ * both channel enable bits located differently and visible
+ * to both functions -- really stupid design decision... :-(
+ * Bit 4 is for the primary channel, bit 5 for the secondary.
+ */
+ d->channels = 1;
+ d->enablebits[0].mask = d->enablebits[0].val = 0x10;
+
+ d->udma_mask = HPT366_ALLOW_ATA66_3 ?
+ (HPT366_ALLOW_ATA66_4 ? 0x1f : 0x0f) : 0x07;
+ break;
+ case 3:
+ case 4:
+ d->udma_mask = HPT370_ALLOW_ATA100_5 ? 0x3f : 0x1f;
+ break;
+ default:
rev = 6;
-
+ /* fall thru */
+ case 5:
+ case 6:
+ d->udma_mask = HPT372_ALLOW_ATA133_6 ? 0x7f : 0x3f;
+ break;
+ }
+
d->name = chipset_names[rev];
pci_set_drvdata(dev, info[rev]);
@@ -1513,15 +1527,6 @@ static int __devinit init_setup_hpt366(struct pci_dev *dev, ide_pci_device_t *d)
if (rev > 2)
goto init_single;
- /*
- * HPT36x chips have one channel per function and have
- * both channel enable bits located differently and visible
- * to both functions -- really stupid design decision... :-(
- * Bit 4 is for the primary channel, bit 5 for the secondary.
- */
- d->channels = 1;
- d->enablebits[0].mask = d->enablebits[0].val = 0x10;
-
if ((dev2 = pci_get_slot(dev->bus, dev->devfn + 1)) != NULL) {
u8 mcr1 = 0, pin1 = 0, pin2 = 0;
int ret;
@@ -1573,6 +1578,7 @@ static ide_pci_device_t hpt366_chipsets[] __devinitdata = {
.channels = 2,
.autodma = AUTODMA,
.enablebits = {{0x50,0x04,0x04}, {0x54,0x04,0x04}},
+ .udma_mask = HPT372_ALLOW_ATA133_6 ? 0x7f : 0x3f,
.bootable = OFF_BOARD,
.extra = 240
},{ /* 2 */
@@ -1584,6 +1590,7 @@ static ide_pci_device_t hpt366_chipsets[] __devinitdata = {
.channels = 2,
.autodma = AUTODMA,
.enablebits = {{0x50,0x04,0x04}, {0x54,0x04,0x04}},
+ .udma_mask = HPT302_ALLOW_ATA133_6 ? 0x7f : 0x3f,
.bootable = OFF_BOARD,
.extra = 240
},{ /* 3 */
@@ -1595,6 +1602,7 @@ static ide_pci_device_t hpt366_chipsets[] __devinitdata = {
.channels = 2,
.autodma = AUTODMA,
.enablebits = {{0x50,0x04,0x04}, {0x54,0x04,0x04}},
+ .udma_mask = HPT371_ALLOW_ATA133_6 ? 0x7f : 0x3f,
.bootable = OFF_BOARD,
.extra = 240
},{ /* 4 */
@@ -1606,6 +1614,7 @@ static ide_pci_device_t hpt366_chipsets[] __devinitdata = {
.channels = 2, /* 4 */
.autodma = AUTODMA,
.enablebits = {{0x50,0x04,0x04}, {0x54,0x04,0x04}},
+ .udma_mask = 0x3f,
.bootable = OFF_BOARD,
.extra = 240
},{ /* 5 */
@@ -1617,6 +1626,7 @@ static ide_pci_device_t hpt366_chipsets[] __devinitdata = {
.channels = 2, /* 4 */
.autodma = AUTODMA,
.enablebits = {{0x50,0x04,0x04}, {0x54,0x04,0x04}},
+ .udma_mask = HPT372_ALLOW_ATA133_6 ? 0x7f : 0x3f,
.bootable = OFF_BOARD,
.extra = 240
}
diff --git a/drivers/ide/pci/it8213.c b/drivers/ide/pci/it8213.c
index c04a02687b9..ff48c23e571 100644
--- a/drivers/ide/pci/it8213.c
+++ b/drivers/ide/pci/it8213.c
@@ -231,7 +231,7 @@ static int it8213_config_drive_for_dma (ide_drive_t *drive)
static void __devinit init_hwif_it8213(ide_hwif_t *hwif)
{
- u8 reg42h = 0, ata66 = 0;
+ u8 reg42h = 0;
hwif->speedproc = &it8213_tune_chipset;
hwif->tuneproc = &it8213_tuneproc;
@@ -250,11 +250,11 @@ static void __devinit init_hwif_it8213(ide_hwif_t *hwif)
hwif->swdma_mask = 0x04;
pci_read_config_byte(hwif->pci_dev, 0x42, &reg42h);
- ata66 = (reg42h & 0x02) ? 0 : 1;
hwif->ide_dma_check = &it8213_config_drive_for_dma;
- if (!(hwif->udma_four))
- hwif->udma_four = ata66;
+
+ if (hwif->cbl != ATA_CBL_PATA40_SHORT)
+ hwif->cbl = (reg42h & 0x02) ? ATA_CBL_PATA40 : ATA_CBL_PATA80;
/*
* The BIOS often doesn't set up DMA on this controller
diff --git a/drivers/ide/pci/it821x.c b/drivers/ide/pci/it821x.c
index 3aeb7f1b791..8197b653ba1 100644
--- a/drivers/ide/pci/it821x.c
+++ b/drivers/ide/pci/it821x.c
@@ -491,10 +491,10 @@ static int it821x_config_drive_for_dma (ide_drive_t *drive)
* the needed logic onboard.
*/
-static unsigned int __devinit ata66_it821x(ide_hwif_t *hwif)
+static u8 __devinit ata66_it821x(ide_hwif_t *hwif)
{
/* The reference driver also only does disk side */
- return 1;
+ return ATA_CBL_PATA80;
}
/**
@@ -662,8 +662,9 @@ static void __devinit init_hwif_it821x(ide_hwif_t *hwif)
hwif->mwdma_mask = 0x07;
hwif->ide_dma_check = &it821x_config_drive_for_dma;
- if (!(hwif->udma_four))
- hwif->udma_four = ata66_it821x(hwif);
+
+ if (hwif->cbl != ATA_CBL_PATA40_SHORT)
+ hwif->cbl = ata66_it821x(hwif);
/*
* The BIOS often doesn't set up DMA on this controller
diff --git a/drivers/ide/pci/jmicron.c b/drivers/ide/pci/jmicron.c
index 76ed2514722..a6008f63e71 100644
--- a/drivers/ide/pci/jmicron.c
+++ b/drivers/ide/pci/jmicron.c
@@ -25,10 +25,10 @@ typedef enum {
* ata66_jmicron - Cable check
* @hwif: IDE port
*
- * Return 1 if the cable is 80pin
+ * Returns the cable type.
*/
-static int __devinit ata66_jmicron(ide_hwif_t *hwif)
+static u8 __devinit ata66_jmicron(ide_hwif_t *hwif)
{
struct pci_dev *pdev = hwif->pci_dev;
@@ -70,16 +70,17 @@ static int __devinit ata66_jmicron(ide_hwif_t *hwif)
{
case PORT_PATA0:
if (control & (1 << 3)) /* 40/80 pin primary */
- return 0;
- return 1;
+ return ATA_CBL_PATA40;
+ return ATA_CBL_PATA80;
case PORT_PATA1:
if (control5 & (1 << 19)) /* 40/80 pin secondary */
- return 0;
- return 1;
+ return ATA_CBL_PATA40;
+ return ATA_CBL_PATA80;
case PORT_SATA:
break;
}
- return 1; /* Avoid bogus "control reaches end of non-void function" */
+ /* Avoid bogus "control reaches end of non-void function" */
+ return ATA_CBL_PATA80;
}
static void jmicron_tuneproc (ide_drive_t *drive, byte mode_wanted)
@@ -159,8 +160,9 @@ static void __devinit init_hwif_jmicron(ide_hwif_t *hwif)
hwif->mwdma_mask = 0x07;
hwif->ide_dma_check = &jmicron_config_drive_for_dma;
- if (!(hwif->udma_four))
- hwif->udma_four = ata66_jmicron(hwif);
+
+ if (hwif->cbl != ATA_CBL_PATA40_SHORT)
+ hwif->cbl = ata66_jmicron(hwif);
hwif->autodma = 1;
hwif->drives[0].autodma = hwif->autodma;
diff --git a/drivers/ide/pci/pdc202xx_new.c b/drivers/ide/pci/pdc202xx_new.c
index 0765dce6948..ee5020df005 100644
--- a/drivers/ide/pci/pdc202xx_new.c
+++ b/drivers/ide/pci/pdc202xx_new.c
@@ -225,7 +225,10 @@ static void pdcnew_tune_drive(ide_drive_t *drive, u8 pio)
static u8 pdcnew_cable_detect(ide_hwif_t *hwif)
{
- return get_indexed_reg(hwif, 0x0b) & 0x04;
+ if (get_indexed_reg(hwif, 0x0b) & 0x04)
+ return ATA_CBL_PATA40;
+ else
+ return ATA_CBL_PATA80;
}
static int pdcnew_config_drive_xfer_rate(ide_drive_t *drive)
@@ -509,8 +512,8 @@ static void __devinit init_hwif_pdc202new(ide_hwif_t *hwif)
hwif->ide_dma_check = &pdcnew_config_drive_xfer_rate;
- if (!hwif->udma_four)
- hwif->udma_four = pdcnew_cable_detect(hwif) ? 0 : 1;
+ if (hwif->cbl != ATA_CBL_PATA40_SHORT)
+ hwif->cbl = pdcnew_cable_detect(hwif);
if (!noautodma)
hwif->autodma = 1;
diff --git a/drivers/ide/pci/pdc202xx_old.c b/drivers/ide/pci/pdc202xx_old.c
index 23844687dee..41ac4a94959 100644
--- a/drivers/ide/pci/pdc202xx_old.c
+++ b/drivers/ide/pci/pdc202xx_old.c
@@ -152,8 +152,10 @@ static void pdc202xx_tune_drive(ide_drive_t *drive, u8 pio)
static u8 pdc202xx_old_cable_detect (ide_hwif_t *hwif)
{
u16 CIS = 0, mask = (hwif->channel) ? (1<<11) : (1<<10);
+
pci_read_config_word(hwif->pci_dev, 0x50, &CIS);
- return (CIS & mask) ? 1 : 0;
+
+ return (CIS & mask) ? ATA_CBL_PATA40 : ATA_CBL_PATA80;
}
/*
@@ -267,18 +269,24 @@ somebody_else:
return (dma_stat & 4) == 4; /* return 1 if INTR asserted */
}
-static int pdc202xx_ide_dma_lostirq(ide_drive_t *drive)
+static void pdc202xx_dma_lost_irq(ide_drive_t *drive)
{
- if (HWIF(drive)->resetproc != NULL)
- HWIF(drive)->resetproc(drive);
- return __ide_dma_lostirq(drive);
+ ide_hwif_t *hwif = HWIF(drive);
+
+ if (hwif->resetproc != NULL)
+ hwif->resetproc(drive);
+
+ ide_dma_lost_irq(drive);
}
-static int pdc202xx_ide_dma_timeout(ide_drive_t *drive)
+static void pdc202xx_dma_timeout(ide_drive_t *drive)
{
- if (HWIF(drive)->resetproc != NULL)
- HWIF(drive)->resetproc(drive);
- return __ide_dma_timeout(drive);
+ ide_hwif_t *hwif = HWIF(drive);
+
+ if (hwif->resetproc != NULL)
+ hwif->resetproc(drive);
+
+ ide_dma_timeout(drive);
}
static void pdc202xx_reset_host (ide_hwif_t *hwif)
@@ -347,12 +355,13 @@ static void __devinit init_hwif_pdc202xx(ide_hwif_t *hwif)
hwif->err_stops_fifo = 1;
hwif->ide_dma_check = &pdc202xx_config_drive_xfer_rate;
- hwif->ide_dma_lostirq = &pdc202xx_ide_dma_lostirq;
- hwif->ide_dma_timeout = &pdc202xx_ide_dma_timeout;
+ hwif->dma_lost_irq = &pdc202xx_dma_lost_irq;
+ hwif->dma_timeout = &pdc202xx_dma_timeout;
if (hwif->pci_dev->device != PCI_DEVICE_ID_PROMISE_20246) {
- if (!(hwif->udma_four))
- hwif->udma_four = (pdc202xx_old_cable_detect(hwif)) ? 0 : 1;
+ if (hwif->cbl != ATA_CBL_PATA40_SHORT)
+ hwif->cbl = pdc202xx_old_cable_detect(hwif);
+
hwif->dma_start = &pdc202xx_old_ide_dma_start;
hwif->ide_dma_end = &pdc202xx_old_ide_dma_end;
}
diff --git a/drivers/ide/pci/piix.c b/drivers/ide/pci/piix.c
index 8b219dd6302..2e0b29ef596 100644
--- a/drivers/ide/pci/piix.c
+++ b/drivers/ide/pci/piix.c
@@ -1,5 +1,5 @@
/*
- * linux/drivers/ide/pci/piix.c Version 0.47 February 8, 2007
+ * linux/drivers/ide/pci/piix.c Version 0.50 Jun 10, 2007
*
* Copyright (C) 1998-1999 Andrzej Krzysztofowicz, Author and Maintainer
* Copyright (C) 1998-2000 Andre Hedrick <andre@linux-ide.org>
@@ -394,14 +394,45 @@ static void piix_dma_clear_irq(ide_drive_t *drive)
hwif->OUTB(dma_stat, hwif->dma_status);
}
-static int __devinit piix_cable_detect(ide_hwif_t *hwif)
+struct ich_laptop {
+ u16 device;
+ u16 subvendor;
+ u16 subdevice;
+};
+
+/*
+ * List of laptops that use short cables rather than 80 wire
+ */
+
+static const struct ich_laptop ich_laptop[] = {
+ /* devid, subvendor, subdev */
+ { 0x27DF, 0x0005, 0x0280 }, /* ICH7 on Acer 5602WLMi */
+ { 0x27DF, 0x1025, 0x0110 }, /* ICH7 on Acer 3682WLMi */
+ { 0x27DF, 0x1043, 0x1267 }, /* ICH7 on Asus W5F */
+ { 0x24CA, 0x1025, 0x0061 }, /* ICH4 on Acer Aspire 2023WLMi */
+ /* end marker */
+ { 0, }
+};
+
+static u8 __devinit piix_cable_detect(ide_hwif_t *hwif)
{
- struct pci_dev *dev = hwif->pci_dev;
+ struct pci_dev *pdev = hwif->pci_dev;
+ const struct ich_laptop *lap = &ich_laptop[0];
u8 reg54h = 0, mask = hwif->channel ? 0xc0 : 0x30;
- pci_read_config_byte(dev, 0x54, &reg54h);
+ /* check for specials */
+ while (lap->device) {
+ if (lap->device == pdev->device &&
+ lap->subvendor == pdev->subsystem_vendor &&
+ lap->subdevice == pdev->subsystem_device) {
+ return ATA_CBL_PATA40_SHORT;
+ }
+ lap++;
+ }
+
+ pci_read_config_byte(pdev, 0x54, &reg54h);
- return (reg54h & mask) ? 1 : 0;
+ return (reg54h & mask) ? ATA_CBL_PATA80 : ATA_CBL_PATA40;
}
/**
@@ -444,8 +475,8 @@ static void __devinit init_hwif_piix(ide_hwif_t *hwif)
hwif->swdma_mask = 0x04;
if (hwif->ultra_mask & 0x78) {
- if (!hwif->udma_four)
- hwif->udma_four = piix_cable_detect(hwif);
+ if (hwif->cbl != ATA_CBL_PATA40_SHORT)
+ hwif->cbl = piix_cable_detect(hwif);
}
if (no_piix_dma)
diff --git a/drivers/ide/pci/scc_pata.c b/drivers/ide/pci/scc_pata.c
index 55bc0a32e34..7b87488e3da 100644
--- a/drivers/ide/pci/scc_pata.c
+++ b/drivers/ide/pci/scc_pata.c
@@ -716,7 +716,7 @@ static void __devinit init_hwif_scc(ide_hwif_t *hwif)
hwif->atapi_dma = 1;
/* we support 80c cable only. */
- hwif->udma_four = 1;
+ hwif->cbl = ATA_CBL_PATA80;
hwif->autodma = 0;
if (!noautodma)
diff --git a/drivers/ide/pci/serverworks.c b/drivers/ide/pci/serverworks.c
index d9c4fd1ae99..1371b5bf6bf 100644
--- a/drivers/ide/pci/serverworks.c
+++ b/drivers/ide/pci/serverworks.c
@@ -1,5 +1,5 @@
/*
- * linux/drivers/ide/pci/serverworks.c Version 0.11 Jun 2 2007
+ * linux/drivers/ide/pci/serverworks.c Version 0.20 Jun 3 2007
*
* Copyright (C) 1998-2000 Michel Aubry
* Copyright (C) 1998-2000 Andrzej Krzysztofowicz
@@ -151,84 +151,11 @@ static int svwks_tune_chipset (ide_drive_t *drive, u8 xferspeed)
if(dev->device == PCI_DEVICE_ID_SERVERWORKS_OSB4 &&
drive->media == ide_disk && speed >= XFER_UDMA_0)
BUG();
-
- pci_read_config_byte(dev, drive_pci[drive->dn], &pio_timing);
- pci_read_config_byte(dev, drive_pci2[drive->dn], &dma_timing);
+
pci_read_config_byte(dev, (0x56|hwif->channel), &ultra_timing);
pci_read_config_word(dev, 0x4A, &csb5_pio);
pci_read_config_byte(dev, 0x54, &ultra_enable);
- /* If we are in RAID mode (eg AMI MegaIDE) then we can't it
- turns out trust the firmware configuration */
-
- if ((dev->class >> 8) != PCI_CLASS_STORAGE_IDE)
- goto oem_setup_failed;
-
- /* Per Specified Design by OEM, and ASIC Architect */
- if ((dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB6IDE) ||
- (dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB6IDE2)) {
- if (!drive->init_speed) {
- u8 dma_stat = inb(hwif->dma_status);
-
- if (((ultra_enable << (7-drive->dn) & 0x80) == 0x80) &&
- ((dma_stat & (1<<(5+unit))) == (1<<(5+unit)))) {
- drive->current_speed = drive->init_speed = XFER_UDMA_0 + udma_modes[(ultra_timing >> (4*unit)) & ~(0xF0)];
- return 0;
- } else if ((dma_timing) &&
- ((dma_stat&(1<<(5+unit)))==(1<<(5+unit)))) {
- u8 dmaspeed;
-
- switch (dma_timing & 0x77) {
- case 0x20:
- dmaspeed = XFER_MW_DMA_2;
- break;
- case 0x21:
- dmaspeed = XFER_MW_DMA_1;
- break;
- case 0x77:
- dmaspeed = XFER_MW_DMA_0;
- break;
- default:
- goto dma_pio;
- }
-
- drive->current_speed = drive->init_speed = dmaspeed;
- return 0;
- }
-dma_pio:
- if (pio_timing) {
- u8 piospeed;
-
- switch (pio_timing & 0x7f) {
- case 0x20:
- piospeed = XFER_PIO_4;
- break;
- case 0x22:
- piospeed = XFER_PIO_3;
- break;
- case 0x34:
- piospeed = XFER_PIO_2;
- break;
- case 0x47:
- piospeed = XFER_PIO_1;
- break;
- case 0x5d:
- piospeed = XFER_PIO_0;
- break;
- default:
- goto oem_setup_failed;
- }
-
- drive->current_speed = drive->init_speed = piospeed;
- return 0;
- }
- }
- }
-
-oem_setup_failed:
-
- pio_timing = 0;
- dma_timing = 0;
ultra_timing &= ~(0x0F << (4*unit));
ultra_enable &= ~(0x01 << drive->dn);
csb5_pio &= ~(0x0F << (4*drive->dn));
@@ -402,9 +329,9 @@ static unsigned int __devinit init_chipset_svwks (struct pci_dev *dev, const cha
return dev->irq;
}
-static unsigned int __devinit ata66_svwks_svwks (ide_hwif_t *hwif)
+static u8 __devinit ata66_svwks_svwks(ide_hwif_t *hwif)
{
- return 1;
+ return ATA_CBL_PATA80;
}
/* On Dell PowerEdge servers with a CSB5/CSB6, the top two bits
@@ -414,7 +341,7 @@ static unsigned int __devinit ata66_svwks_svwks (ide_hwif_t *hwif)
* Bit 14 clear = primary IDE channel does not have 80-pin cable.
* Bit 14 set = primary IDE channel has 80-pin cable.
*/
-static unsigned int __devinit ata66_svwks_dell (ide_hwif_t *hwif)
+static u8 __devinit ata66_svwks_dell(ide_hwif_t *hwif)
{
struct pci_dev *dev = hwif->pci_dev;
if (dev->subsystem_vendor == PCI_VENDOR_ID_DELL &&
@@ -422,8 +349,8 @@ static unsigned int __devinit ata66_svwks_dell (ide_hwif_t *hwif)
(dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB5IDE ||
dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB6IDE))
return ((1 << (hwif->channel + 14)) &
- dev->subsystem_device) ? 1 : 0;
- return 0;
+ dev->subsystem_device) ? ATA_CBL_PATA80 : ATA_CBL_PATA40;
+ return ATA_CBL_PATA40;
}
/* Sun Cobalt Alpine hardware avoids the 80-pin cable
@@ -432,18 +359,18 @@ static unsigned int __devinit ata66_svwks_dell (ide_hwif_t *hwif)
*
* WARNING: this only works on Alpine hardware!
*/
-static unsigned int __devinit ata66_svwks_cobalt (ide_hwif_t *hwif)
+static u8 __devinit ata66_svwks_cobalt(ide_hwif_t *hwif)
{
struct pci_dev *dev = hwif->pci_dev;
if (dev->subsystem_vendor == PCI_VENDOR_ID_SUN &&
dev->vendor == PCI_VENDOR_ID_SERVERWORKS &&
dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB5IDE)
return ((1 << (hwif->channel + 14)) &
- dev->subsystem_device) ? 1 : 0;
- return 0;
+ dev->subsystem_device) ? ATA_CBL_PATA80 : ATA_CBL_PATA40;
+ return ATA_CBL_PATA40;
}
-static unsigned int __devinit ata66_svwks (ide_hwif_t *hwif)
+static u8 __devinit ata66_svwks(ide_hwif_t *hwif)
{
struct pci_dev *dev = hwif->pci_dev;
@@ -462,9 +389,9 @@ static unsigned int __devinit ata66_svwks (ide_hwif_t *hwif)
/* Per Specified Design by OEM, and ASIC Architect */
if ((dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB6IDE) ||
(dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB6IDE2))
- return 1;
+ return ATA_CBL_PATA80;
- return 0;
+ return ATA_CBL_PATA40;
}
static void __devinit init_hwif_svwks (ide_hwif_t *hwif)
@@ -495,8 +422,8 @@ static void __devinit init_hwif_svwks (ide_hwif_t *hwif)
hwif->ide_dma_check = &svwks_config_drive_xfer_rate;
if (hwif->pci_dev->device != PCI_DEVICE_ID_SERVERWORKS_OSB4IDE) {
- if (!hwif->udma_four)
- hwif->udma_four = ata66_svwks(hwif);
+ if (hwif->cbl != ATA_CBL_PATA40_SHORT)
+ hwif->cbl = ata66_svwks(hwif);
}
if (!noautodma)
hwif->autodma = 1;
diff --git a/drivers/ide/pci/sgiioc4.c b/drivers/ide/pci/sgiioc4.c
index d3185e29a38..d396b2929ed 100644
--- a/drivers/ide/pci/sgiioc4.c
+++ b/drivers/ide/pci/sgiioc4.c
@@ -316,14 +316,6 @@ static void sgiioc4_dma_host_off(ide_drive_t * drive)
sgiioc4_clearirq(drive);
}
-static int
-sgiioc4_ide_dma_lostirq(ide_drive_t * drive)
-{
- HWIF(drive)->resetproc(drive);
-
- return __ide_dma_lostirq(drive);
-}
-
static void
sgiioc4_resetproc(ide_drive_t * drive)
{
@@ -331,6 +323,14 @@ sgiioc4_resetproc(ide_drive_t * drive)
sgiioc4_clearirq(drive);
}
+static void
+sgiioc4_dma_lost_irq(ide_drive_t * drive)
+{
+ sgiioc4_resetproc(drive);
+
+ ide_dma_lost_irq(drive);
+}
+
static u8
sgiioc4_INB(unsigned long port)
{
@@ -607,8 +607,8 @@ ide_init_sgiioc4(ide_hwif_t * hwif)
hwif->ide_dma_test_irq = &sgiioc4_ide_dma_test_irq;
hwif->dma_host_on = &sgiioc4_dma_host_on;
hwif->dma_host_off = &sgiioc4_dma_host_off;
- hwif->ide_dma_lostirq = &sgiioc4_ide_dma_lostirq;
- hwif->ide_dma_timeout = &__ide_dma_timeout;
+ hwif->dma_lost_irq = &sgiioc4_dma_lost_irq;
+ hwif->dma_timeout = &ide_dma_timeout;
hwif->INB = &sgiioc4_INB;
}
diff --git a/drivers/ide/pci/siimage.c b/drivers/ide/pci/siimage.c
index 1a4444e7226..1c3e3548789 100644
--- a/drivers/ide/pci/siimage.c
+++ b/drivers/ide/pci/siimage.c
@@ -933,16 +933,17 @@ static void __devinit init_iops_siimage(ide_hwif_t *hwif)
* interface.
*/
-static unsigned int __devinit ata66_siimage(ide_hwif_t *hwif)
+static u8 __devinit ata66_siimage(ide_hwif_t *hwif)
{
unsigned long addr = siimage_selreg(hwif, 0);
- if (pci_get_drvdata(hwif->pci_dev) == NULL) {
- u8 ata66 = 0;
+ u8 ata66 = 0;
+
+ if (pci_get_drvdata(hwif->pci_dev) == NULL)
pci_read_config_byte(hwif->pci_dev, addr, &ata66);
- return (ata66 & 0x01) ? 1 : 0;
- }
+ else
+ ata66 = hwif->INB(addr);
- return (hwif->INB(addr) & 0x01) ? 1 : 0;
+ return (ata66 & 0x01) ? ATA_CBL_PATA80 : ATA_CBL_PATA40;
}
/**
@@ -988,8 +989,9 @@ static void __devinit init_hwif_siimage(ide_hwif_t *hwif)
hwif->atapi_dma = 1;
hwif->ide_dma_check = &siimage_config_drive_for_dma;
- if (!(hwif->udma_four))
- hwif->udma_four = ata66_siimage(hwif);
+
+ if (hwif->cbl != ATA_CBL_PATA40_SHORT)
+ hwif->cbl = ata66_siimage(hwif);
if (hwif->mmio) {
hwif->ide_dma_test_irq = &siimage_mmio_ide_dma_test_irq;
diff --git a/drivers/ide/pci/sis5513.c b/drivers/ide/pci/sis5513.c
index ec0adad9ef6..f875183ac8d 100644
--- a/drivers/ide/pci/sis5513.c
+++ b/drivers/ide/pci/sis5513.c
@@ -1,5 +1,5 @@
/*
- * linux/drivers/ide/pci/sis5513.c Version 0.20 Mar 4, 2007
+ * linux/drivers/ide/pci/sis5513.c Version 0.25 Jun 10, 2007
*
* Copyright (C) 1999-2000 Andre Hedrick <andre@linux-ide.org>
* Copyright (C) 2002 Lionel Bouton <Lionel.Bouton@inet6.fr>, Maintainer
@@ -796,10 +796,33 @@ static unsigned int __devinit init_chipset_sis5513 (struct pci_dev *dev, const c
return 0;
}
-static unsigned int __devinit ata66_sis5513 (ide_hwif_t *hwif)
+struct sis_laptop {
+ u16 device;
+ u16 subvendor;
+ u16 subdevice;
+};
+
+static const struct sis_laptop sis_laptop[] = {
+ /* devid, subvendor, subdev */
+ { 0x5513, 0x1043, 0x1107 }, /* ASUS A6K */
+ /* end marker */
+ { 0, }
+};
+
+static u8 __devinit ata66_sis5513(ide_hwif_t *hwif)
{
+ struct pci_dev *pdev = hwif->pci_dev;
+ const struct sis_laptop *lap = &sis_laptop[0];
u8 ata66 = 0;
+ while (lap->device) {
+ if (lap->device == pdev->device &&
+ lap->subvendor == pdev->subsystem_vendor &&
+ lap->subdevice == pdev->subsystem_device)
+ return ATA_CBL_PATA40_SHORT;
+ lap++;
+ }
+
if (chipset_family >= ATA_133) {
u16 regw = 0;
u16 reg_addr = hwif->channel ? 0x52: 0x50;
@@ -811,7 +834,8 @@ static unsigned int __devinit ata66_sis5513 (ide_hwif_t *hwif)
pci_read_config_byte(hwif->pci_dev, 0x48, &reg48h);
ata66 = (reg48h & mask) ? 0 : 1;
}
- return ata66;
+
+ return ata66 ? ATA_CBL_PATA80 : ATA_CBL_PATA40;
}
static void __devinit init_hwif_sis5513 (ide_hwif_t *hwif)
@@ -841,8 +865,8 @@ static void __devinit init_hwif_sis5513 (ide_hwif_t *hwif)
if (!chipset_family)
return;
- if (!(hwif->udma_four))
- hwif->udma_four = ata66_sis5513(hwif);
+ if (hwif->cbl != ATA_CBL_PATA40_SHORT)
+ hwif->cbl = ata66_sis5513(hwif);
if (chipset_family > ATA_16) {
hwif->ide_dma_check = &sis5513_config_xfer_rate;
diff --git a/drivers/ide/pci/sl82c105.c b/drivers/ide/pci/sl82c105.c
index 7c383d9cc47..487879842af 100644
--- a/drivers/ide/pci/sl82c105.c
+++ b/drivers/ide/pci/sl82c105.c
@@ -195,7 +195,7 @@ static inline void sl82c105_reset_host(struct pci_dev *dev)
* This function is called when the IDE timer expires, the drive
* indicates that it is READY, and we were waiting for DMA to complete.
*/
-static int sl82c105_ide_dma_lostirq(ide_drive_t *drive)
+static void sl82c105_dma_lost_irq(ide_drive_t *drive)
{
ide_hwif_t *hwif = HWIF(drive);
struct pci_dev *dev = hwif->pci_dev;
@@ -222,9 +222,6 @@ static int sl82c105_ide_dma_lostirq(ide_drive_t *drive)
}
sl82c105_reset_host(dev);
-
- /* __ide_dma_lostirq would return 1, so we do as well */
- return 1;
}
/*
@@ -244,15 +241,12 @@ static void sl82c105_dma_start(ide_drive_t *drive)
ide_dma_start(drive);
}
-static int sl82c105_ide_dma_timeout(ide_drive_t *drive)
+static void sl82c105_dma_timeout(ide_drive_t *drive)
{
- ide_hwif_t *hwif = HWIF(drive);
- struct pci_dev *dev = hwif->pci_dev;
+ DBG(("sl82c105_dma_timeout(drive:%s)\n", drive->name));
- DBG(("sl82c105_ide_dma_timeout(drive:%s)\n", drive->name));
-
- sl82c105_reset_host(dev);
- return __ide_dma_timeout(drive);
+ sl82c105_reset_host(HWIF(drive)->pci_dev);
+ ide_dma_timeout(drive);
}
static int sl82c105_ide_dma_on(ide_drive_t *drive)
@@ -441,9 +435,9 @@ static void __devinit init_hwif_sl82c105(ide_hwif_t *hwif)
hwif->ide_dma_check = &sl82c105_ide_dma_check;
hwif->ide_dma_on = &sl82c105_ide_dma_on;
hwif->dma_off_quietly = &sl82c105_dma_off_quietly;
- hwif->ide_dma_lostirq = &sl82c105_ide_dma_lostirq;
+ hwif->dma_lost_irq = &sl82c105_dma_lost_irq;
hwif->dma_start = &sl82c105_dma_start;
- hwif->ide_dma_timeout = &sl82c105_ide_dma_timeout;
+ hwif->dma_timeout = &sl82c105_dma_timeout;
if (!noautodma)
hwif->autodma = 1;
diff --git a/drivers/ide/pci/slc90e66.c b/drivers/ide/pci/slc90e66.c
index c40f291f91e..575dbbd8b48 100644
--- a/drivers/ide/pci/slc90e66.c
+++ b/drivers/ide/pci/slc90e66.c
@@ -199,10 +199,9 @@ static void __devinit init_hwif_slc90e66 (ide_hwif_t *hwif)
hwif->mwdma_mask = 0x06;
hwif->swdma_mask = 0x04;
- if (!hwif->udma_four) {
+ if (hwif->cbl != ATA_CBL_PATA40_SHORT)
/* bit[0(1)]: 0:80, 1:40 */
- hwif->udma_four = (reg47 & mask) ? 0 : 1;
- }
+ hwif->cbl = (reg47 & mask) ? ATA_CBL_PATA40 : ATA_CBL_PATA80;
hwif->ide_dma_check = &slc90e66_config_drive_xfer_rate;
diff --git a/drivers/ide/pci/tc86c001.c b/drivers/ide/pci/tc86c001.c
index cee619bb2ea..8de1f8e2249 100644
--- a/drivers/ide/pci/tc86c001.c
+++ b/drivers/ide/pci/tc86c001.c
@@ -220,13 +220,13 @@ static void __devinit init_hwif_tc86c001(ide_hwif_t *hwif)
hwif->ide_dma_check = &tc86c001_config_drive_xfer_rate;
hwif->dma_start = &tc86c001_dma_start;
- if (!hwif->udma_four) {
+ if (hwif->cbl != ATA_CBL_PATA40_SHORT) {
/*
* System Control 1 Register bit 13 (PDIAGN):
* 0=80-pin cable, 1=40-pin cable
*/
scr1 = hwif->INW(sc_base + 0x00);
- hwif->udma_four = (scr1 & 0x2000) ? 0 : 1;
+ hwif->cbl = (scr1 & 0x2000) ? ATA_CBL_PATA40 : ATA_CBL_PATA80;
}
if (!noautodma)
diff --git a/drivers/ide/pci/via82cxxx.c b/drivers/ide/pci/via82cxxx.c
index a508550c409..d21dd2e7eeb 100644
--- a/drivers/ide/pci/via82cxxx.c
+++ b/drivers/ide/pci/via82cxxx.c
@@ -1,6 +1,6 @@
/*
*
- * Version 3.38
+ * Version 3.45
*
* VIA IDE driver for Linux. Supported southbridges:
*
@@ -9,6 +9,7 @@
* vt8235, vt8237, vt8237a
*
* Copyright (c) 2000-2002 Vojtech Pavlik
+ * Copyright (c) 2007 Bartlomiej Zolnierkiewicz
*
* Based on the work of:
* Michel Aubry
@@ -33,6 +34,8 @@
#include <linux/pci.h>
#include <linux/init.h>
#include <linux/ide.h>
+#include <linux/dmi.h>
+
#include <asm/io.h>
#ifdef CONFIG_PPC_CHRP
@@ -41,8 +44,6 @@
#include "ide-timing.h"
-#define DISPLAY_VIA_TIMINGS
-
#define VIA_IDE_ENABLE 0x40
#define VIA_IDE_CONFIG 0x41
#define VIA_FIFO_CONFIG 0x43
@@ -54,18 +55,12 @@
#define VIA_ADDRESS_SETUP 0x4c
#define VIA_UDMA_TIMING 0x50
-#define VIA_UDMA 0x007
-#define VIA_UDMA_NONE 0x000
-#define VIA_UDMA_33 0x001
-#define VIA_UDMA_66 0x002
-#define VIA_UDMA_100 0x003
-#define VIA_UDMA_133 0x004
-#define VIA_BAD_PREQ 0x010 /* Crashes if PREQ# till DDACK# set */
-#define VIA_BAD_CLK66 0x020 /* 66 MHz clock doesn't work correctly */
-#define VIA_SET_FIFO 0x040 /* Needs to have FIFO split set */
-#define VIA_NO_UNMASK 0x080 /* Doesn't work with IRQ unmasking on */
-#define VIA_BAD_ID 0x100 /* Has wrong vendor ID (0x1107) */
-#define VIA_BAD_AST 0x200 /* Don't touch Address Setup Timing */
+#define VIA_BAD_PREQ 0x01 /* Crashes if PREQ# till DDACK# set */
+#define VIA_BAD_CLK66 0x02 /* 66 MHz clock doesn't work correctly */
+#define VIA_SET_FIFO 0x04 /* Needs to have FIFO split set */
+#define VIA_NO_UNMASK 0x08 /* Doesn't work with IRQ unmasking on */
+#define VIA_BAD_ID 0x10 /* Has wrong vendor ID (0x1107) */
+#define VIA_BAD_AST 0x20 /* Don't touch Address Setup Timing */
/*
* VIA SouthBridge chips.
@@ -76,36 +71,37 @@ static struct via_isa_bridge {
u16 id;
u8 rev_min;
u8 rev_max;
- u16 flags;
+ u8 udma_mask;
+ u8 flags;
} via_isa_bridges[] = {
- { "cx700", PCI_DEVICE_ID_VIA_CX700, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST },
- { "vt8237s", PCI_DEVICE_ID_VIA_8237S, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST },
- { "vt6410", PCI_DEVICE_ID_VIA_6410, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST },
- { "vt8251", PCI_DEVICE_ID_VIA_8251, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST },
- { "vt8237", PCI_DEVICE_ID_VIA_8237, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST },
- { "vt8237a", PCI_DEVICE_ID_VIA_8237A, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST },
- { "vt8235", PCI_DEVICE_ID_VIA_8235, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST },
- { "vt8233a", PCI_DEVICE_ID_VIA_8233A, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST },
- { "vt8233c", PCI_DEVICE_ID_VIA_8233C_0, 0x00, 0x2f, VIA_UDMA_100 },
- { "vt8233", PCI_DEVICE_ID_VIA_8233_0, 0x00, 0x2f, VIA_UDMA_100 },
- { "vt8231", PCI_DEVICE_ID_VIA_8231, 0x00, 0x2f, VIA_UDMA_100 },
- { "vt82c686b", PCI_DEVICE_ID_VIA_82C686, 0x40, 0x4f, VIA_UDMA_100 },
- { "vt82c686a", PCI_DEVICE_ID_VIA_82C686, 0x10, 0x2f, VIA_UDMA_66 },
- { "vt82c686", PCI_DEVICE_ID_VIA_82C686, 0x00, 0x0f, VIA_UDMA_33 | VIA_BAD_CLK66 },
- { "vt82c596b", PCI_DEVICE_ID_VIA_82C596, 0x10, 0x2f, VIA_UDMA_66 },
- { "vt82c596a", PCI_DEVICE_ID_VIA_82C596, 0x00, 0x0f, VIA_UDMA_33 | VIA_BAD_CLK66 },
- { "vt82c586b", PCI_DEVICE_ID_VIA_82C586_0, 0x47, 0x4f, VIA_UDMA_33 | VIA_SET_FIFO },
- { "vt82c586b", PCI_DEVICE_ID_VIA_82C586_0, 0x40, 0x46, VIA_UDMA_33 | VIA_SET_FIFO | VIA_BAD_PREQ },
- { "vt82c586b", PCI_DEVICE_ID_VIA_82C586_0, 0x30, 0x3f, VIA_UDMA_33 | VIA_SET_FIFO },
- { "vt82c586a", PCI_DEVICE_ID_VIA_82C586_0, 0x20, 0x2f, VIA_UDMA_33 | VIA_SET_FIFO },
- { "vt82c586", PCI_DEVICE_ID_VIA_82C586_0, 0x00, 0x0f, VIA_UDMA_NONE | VIA_SET_FIFO },
- { "vt82c576", PCI_DEVICE_ID_VIA_82C576, 0x00, 0x2f, VIA_UDMA_NONE | VIA_SET_FIFO | VIA_NO_UNMASK },
- { "vt82c576", PCI_DEVICE_ID_VIA_82C576, 0x00, 0x2f, VIA_UDMA_NONE | VIA_SET_FIFO | VIA_NO_UNMASK | VIA_BAD_ID },
+ { "cx700", PCI_DEVICE_ID_VIA_CX700, 0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST },
+ { "vt8237s", PCI_DEVICE_ID_VIA_8237S, 0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST },
+ { "vt6410", PCI_DEVICE_ID_VIA_6410, 0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST },
+ { "vt8251", PCI_DEVICE_ID_VIA_8251, 0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST },
+ { "vt8237", PCI_DEVICE_ID_VIA_8237, 0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST },
+ { "vt8237a", PCI_DEVICE_ID_VIA_8237A, 0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST },
+ { "vt8235", PCI_DEVICE_ID_VIA_8235, 0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST },
+ { "vt8233a", PCI_DEVICE_ID_VIA_8233A, 0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST },
+ { "vt8233c", PCI_DEVICE_ID_VIA_8233C_0, 0x00, 0x2f, ATA_UDMA5, },
+ { "vt8233", PCI_DEVICE_ID_VIA_8233_0, 0x00, 0x2f, ATA_UDMA5, },
+ { "vt8231", PCI_DEVICE_ID_VIA_8231, 0x00, 0x2f, ATA_UDMA5, },
+ { "vt82c686b", PCI_DEVICE_ID_VIA_82C686, 0x40, 0x4f, ATA_UDMA5, },
+ { "vt82c686a", PCI_DEVICE_ID_VIA_82C686, 0x10, 0x2f, ATA_UDMA4, },
+ { "vt82c686", PCI_DEVICE_ID_VIA_82C686, 0x00, 0x0f, ATA_UDMA2, VIA_BAD_CLK66 },
+ { "vt82c596b", PCI_DEVICE_ID_VIA_82C596, 0x10, 0x2f, ATA_UDMA4, },
+ { "vt82c596a", PCI_DEVICE_ID_VIA_82C596, 0x00, 0x0f, ATA_UDMA2, VIA_BAD_CLK66 },
+ { "vt82c586b", PCI_DEVICE_ID_VIA_82C586_0, 0x47, 0x4f, ATA_UDMA2, VIA_SET_FIFO },
+ { "vt82c586b", PCI_DEVICE_ID_VIA_82C586_0, 0x40, 0x46, ATA_UDMA2, VIA_SET_FIFO | VIA_BAD_PREQ },
+ { "vt82c586b", PCI_DEVICE_ID_VIA_82C586_0, 0x30, 0x3f, ATA_UDMA2, VIA_SET_FIFO },
+ { "vt82c586a", PCI_DEVICE_ID_VIA_82C586_0, 0x20, 0x2f, ATA_UDMA2, VIA_SET_FIFO },
+ { "vt82c586", PCI_DEVICE_ID_VIA_82C586_0, 0x00, 0x0f, 0x00, VIA_SET_FIFO },
+ { "vt82c576", PCI_DEVICE_ID_VIA_82C576, 0x00, 0x2f, 0x00, VIA_SET_FIFO | VIA_NO_UNMASK },
+ { "vt82c576", PCI_DEVICE_ID_VIA_82C576, 0x00, 0x2f, 0x00, VIA_SET_FIFO | VIA_NO_UNMASK | VIA_BAD_ID },
{ NULL }
};
static unsigned int via_clock;
-static char *via_dma[] = { "MWDMA16", "UDMA33", "UDMA66", "UDMA100", "UDMA133" };
+static char *via_dma[] = { "16", "25", "33", "44", "66", "100", "133" };
struct via82cxxx_dev
{
@@ -140,12 +136,12 @@ static void via_set_speed(ide_hwif_t *hwif, u8 dn, struct ide_timing *timing)
pci_write_config_byte(dev, VIA_DRIVE_TIMING + (3 - dn),
((FIT(timing->active, 1, 16) - 1) << 4) | (FIT(timing->recover, 1, 16) - 1));
- switch (vdev->via_config->flags & VIA_UDMA) {
- case VIA_UDMA_33: t = timing->udma ? (0xe0 | (FIT(timing->udma, 2, 5) - 2)) : 0x03; break;
- case VIA_UDMA_66: t = timing->udma ? (0xe8 | (FIT(timing->udma, 2, 9) - 2)) : 0x0f; break;
- case VIA_UDMA_100: t = timing->udma ? (0xe0 | (FIT(timing->udma, 2, 9) - 2)) : 0x07; break;
- case VIA_UDMA_133: t = timing->udma ? (0xe0 | (FIT(timing->udma, 2, 9) - 2)) : 0x07; break;
- default: return;
+ switch (vdev->via_config->udma_mask) {
+ case ATA_UDMA2: t = timing->udma ? (0xe0 | (FIT(timing->udma, 2, 5) - 2)) : 0x03; break;
+ case ATA_UDMA4: t = timing->udma ? (0xe8 | (FIT(timing->udma, 2, 9) - 2)) : 0x0f; break;
+ case ATA_UDMA5: t = timing->udma ? (0xe0 | (FIT(timing->udma, 2, 9) - 2)) : 0x07; break;
+ case ATA_UDMA6: t = timing->udma ? (0xe0 | (FIT(timing->udma, 2, 9) - 2)) : 0x07; break;
+ default: return;
}
pci_write_config_byte(dev, VIA_UDMA_TIMING + (3 - dn), t);
@@ -173,12 +169,12 @@ static int via_set_drive(ide_drive_t *drive, u8 speed)
T = 1000000000 / via_clock;
- switch (vdev->via_config->flags & VIA_UDMA) {
- case VIA_UDMA_33: UT = T; break;
- case VIA_UDMA_66: UT = T/2; break;
- case VIA_UDMA_100: UT = T/3; break;
- case VIA_UDMA_133: UT = T/4; break;
- default: UT = T;
+ switch (vdev->via_config->udma_mask) {
+ case ATA_UDMA2: UT = T; break;
+ case ATA_UDMA4: UT = T/2; break;
+ case ATA_UDMA5: UT = T/3; break;
+ case ATA_UDMA6: UT = T/4; break;
+ default: UT = T;
}
ide_timing_compute(drive, speed, &t, T, UT);
@@ -208,8 +204,7 @@ static int via_set_drive(ide_drive_t *drive, u8 speed)
static void via82cxxx_tune_drive(ide_drive_t *drive, u8 pio)
{
if (pio == 255) {
- via_set_drive(drive,
- ide_find_best_mode(drive, XFER_PIO | XFER_EPIO));
+ via_set_drive(drive, ide_find_best_pio_mode(drive));
return;
}
@@ -226,16 +221,10 @@ static void via82cxxx_tune_drive(ide_drive_t *drive, u8 pio)
static int via82cxxx_ide_dma_check (ide_drive_t *drive)
{
- ide_hwif_t *hwif = HWIF(drive);
- struct via82cxxx_dev *vdev = pci_get_drvdata(hwif->pci_dev);
- u16 w80 = hwif->udma_four;
+ u8 speed = ide_max_dma_mode(drive);
- u16 speed = ide_find_best_mode(drive,
- XFER_PIO | XFER_EPIO | XFER_SWDMA | XFER_MWDMA |
- (vdev->via_config->flags & VIA_UDMA ? XFER_UDMA : 0) |
- (w80 && (vdev->via_config->flags & VIA_UDMA) >= VIA_UDMA_66 ? XFER_UDMA_66 : 0) |
- (w80 && (vdev->via_config->flags & VIA_UDMA) >= VIA_UDMA_100 ? XFER_UDMA_100 : 0) |
- (w80 && (vdev->via_config->flags & VIA_UDMA) >= VIA_UDMA_133 ? XFER_UDMA_133 : 0));
+ if (speed == 0)
+ speed = ide_find_best_pio_mode(drive);
via_set_drive(drive, speed);
@@ -272,8 +261,8 @@ static void __devinit via_cable_detect(struct via82cxxx_dev *vdev, u32 u)
{
int i;
- switch (vdev->via_config->flags & VIA_UDMA) {
- case VIA_UDMA_66:
+ switch (vdev->via_config->udma_mask) {
+ case ATA_UDMA4:
for (i = 24; i >= 0; i -= 8)
if (((u >> (i & 16)) & 8) &&
((u >> i) & 0x20) &&
@@ -286,7 +275,7 @@ static void __devinit via_cable_detect(struct via82cxxx_dev *vdev, u32 u)
}
break;
- case VIA_UDMA_100:
+ case ATA_UDMA5:
for (i = 24; i >= 0; i -= 8)
if (((u >> i) & 0x10) ||
(((u >> i) & 0x20) &&
@@ -298,7 +287,7 @@ static void __devinit via_cable_detect(struct via82cxxx_dev *vdev, u32 u)
}
break;
- case VIA_UDMA_133:
+ case ATA_UDMA6:
for (i = 24; i >= 0; i -= 8)
if (((u >> i) & 0x10) ||
(((u >> i) & 0x20) &&
@@ -353,7 +342,7 @@ static unsigned int __devinit init_chipset_via82cxxx(struct pci_dev *dev, const
via_cable_detect(vdev, u);
- if ((via_config->flags & VIA_UDMA) == VIA_UDMA_66) {
+ if (via_config->udma_mask == ATA_UDMA4) {
/* Enable Clk66 */
pci_write_config_dword(dev, VIA_UDMA_TIMING, u|0x80008);
} else if (via_config->flags & VIA_BAD_CLK66) {
@@ -416,16 +405,54 @@ static unsigned int __devinit init_chipset_via82cxxx(struct pci_dev *dev, const
*/
pci_read_config_byte(isa, PCI_REVISION_ID, &t);
- printk(KERN_INFO "VP_IDE: VIA %s (rev %02x) IDE %s "
+ printk(KERN_INFO "VP_IDE: VIA %s (rev %02x) IDE %sDMA%s "
"controller on pci%s\n",
via_config->name, t,
- via_dma[via_config->flags & VIA_UDMA],
+ via_config->udma_mask ? "U" : "MW",
+ via_dma[via_config->udma_mask ?
+ (fls(via_config->udma_mask) - 1) : 0],
pci_name(dev));
pci_dev_put(isa);
return 0;
}
+/*
+ * Cable special cases
+ */
+
+static struct dmi_system_id cable_dmi_table[] = {
+ {
+ .ident = "Acer Ferrari 3400",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Acer,Inc."),
+ DMI_MATCH(DMI_BOARD_NAME, "Ferrari 3400"),
+ },
+ },
+ { }
+};
+
+static int via_cable_override(void)
+{
+ /* Systems by DMI */
+ if (dmi_check_system(cable_dmi_table))
+ return 1;
+ return 0;
+}
+
+static u8 __devinit via82cxxx_cable_detect(ide_hwif_t *hwif)
+{
+ struct via82cxxx_dev *vdev = pci_get_drvdata(hwif->pci_dev);
+
+ if (via_cable_override())
+ return ATA_CBL_PATA40_SHORT;
+
+ if ((vdev->via_80w >> hwif->channel) & 1)
+ return ATA_CBL_PATA80;
+ else
+ return ATA_CBL_PATA40;
+}
+
static void __devinit init_hwif_via82cxxx(ide_hwif_t *hwif)
{
struct via82cxxx_dev *vdev = pci_get_drvdata(hwif->pci_dev);
@@ -454,12 +481,14 @@ static void __devinit init_hwif_via82cxxx(ide_hwif_t *hwif)
return;
hwif->atapi_dma = 1;
- hwif->ultra_mask = 0x7f;
+
+ hwif->ultra_mask = vdev->via_config->udma_mask;
hwif->mwdma_mask = 0x07;
hwif->swdma_mask = 0x07;
- if (!hwif->udma_four)
- hwif->udma_four = (vdev->via_80w >> hwif->channel) & 1;
+ if (hwif->cbl != ATA_CBL_PATA40_SHORT)
+ hwif->cbl = via82cxxx_cable_detect(hwif);
+
hwif->ide_dma_check = &via82cxxx_ide_dma_check;
if (!noautodma)
hwif->autodma = 1;
diff --git a/drivers/ide/ppc/pmac.c b/drivers/ide/ppc/pmac.c
index 45fc36f0f21..e46f4720654 100644
--- a/drivers/ide/ppc/pmac.c
+++ b/drivers/ide/ppc/pmac.c
@@ -942,8 +942,8 @@ pmac_ide_tune_chipset (ide_drive_t *drive, byte speed)
return 1;
case XFER_UDMA_4:
case XFER_UDMA_3:
- if (HWIF(drive)->udma_four == 0)
- return 1;
+ if (drive->hwif->cbl != ATA_CBL_PATA80)
+ return 1;
case XFER_UDMA_2:
case XFER_UDMA_1:
case XFER_UDMA_0:
@@ -1244,7 +1244,7 @@ pmac_ide_setup_device(pmac_ide_hwif_t *pmif, ide_hwif_t *hwif)
hwif->chipset = ide_pmac;
hwif->noprobe = !hwif->io_ports[IDE_DATA_OFFSET] || pmif->mediabay;
hwif->hold = pmif->mediabay;
- hwif->udma_four = pmif->cable_80;
+ hwif->cbl = pmif->cable_80 ? ATA_CBL_PATA80 : ATA_CBL_PATA40;
hwif->drives[0].unmask = 1;
hwif->drives[1].unmask = 1;
hwif->tuneproc = pmac_ide_tuneproc;
@@ -1821,28 +1821,11 @@ pmac_ide_dma_check(ide_drive_t *drive)
enable = 0;
if (enable) {
- short mode;
-
- map = XFER_MWDMA;
- if (pmif->kind == controller_kl_ata4
- || pmif->kind == controller_un_ata6
- || pmif->kind == controller_k2_ata6
- || pmif->kind == controller_sh_ata6) {
- map |= XFER_UDMA;
- if (pmif->cable_80) {
- map |= XFER_UDMA_66;
- if (pmif->kind == controller_un_ata6 ||
- pmif->kind == controller_k2_ata6 ||
- pmif->kind == controller_sh_ata6)
- map |= XFER_UDMA_100;
- if (pmif->kind == controller_sh_ata6)
- map |= XFER_UDMA_133;
- }
- }
- mode = ide_find_best_mode(drive, map);
- if (mode & XFER_UDMA)
+ u8 mode = ide_max_dma_mode(drive);
+
+ if (mode >= XFER_UDMA_0)
drive->using_dma = pmac_ide_udma_enable(drive, mode);
- else if (mode & XFER_MWDMA)
+ else if (mode >= XFER_MW_DMA_0)
drive->using_dma = pmac_ide_mdma_enable(drive, mode);
hwif->OUTB(0, IDE_CONTROL_REG);
/* Apply settings to controller */
@@ -2004,20 +1987,19 @@ static void pmac_ide_dma_host_on(ide_drive_t *drive)
{
}
-static int
-pmac_ide_dma_lostirq (ide_drive_t *drive)
+static void
+pmac_ide_dma_lost_irq (ide_drive_t *drive)
{
pmac_ide_hwif_t* pmif = (pmac_ide_hwif_t *)HWIF(drive)->hwif_data;
volatile struct dbdma_regs __iomem *dma;
unsigned long status;
if (pmif == NULL)
- return 0;
+ return;
dma = pmif->dma_regs;
status = readl(&dma->status);
printk(KERN_ERR "ide-pmac lost interrupt, dma status: %lx\n", status);
- return 0;
}
/*
@@ -2057,8 +2039,8 @@ pmac_ide_setup_dma(pmac_ide_hwif_t *pmif, ide_hwif_t *hwif)
hwif->ide_dma_test_irq = &pmac_ide_dma_test_irq;
hwif->dma_host_off = &pmac_ide_dma_host_off;
hwif->dma_host_on = &pmac_ide_dma_host_on;
- hwif->ide_dma_timeout = &__ide_dma_timeout;
- hwif->ide_dma_lostirq = &pmac_ide_dma_lostirq;
+ hwif->dma_timeout = &ide_dma_timeout;
+ hwif->dma_lost_irq = &pmac_ide_dma_lost_irq;
hwif->atapi_dma = 1;
switch(pmif->kind) {
diff --git a/fs/jfs/endian24.h b/fs/jfs/endian24.h
index 79494c4f2b1..fa92f7f1d0d 100644
--- a/fs/jfs/endian24.h
+++ b/fs/jfs/endian24.h
@@ -29,7 +29,7 @@
__u32 __x = (x); \
((__u32)( \
((__x & (__u32)0x000000ffUL) << 16) | \
- (__x & (__u32)0x0000ff00UL) | \
+ (__x & (__u32)0x0000ff00UL) | \
((__x & (__u32)0x00ff0000UL) >> 16) )); \
})
diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c
index 9c5d59632aa..887f5759e53 100644
--- a/fs/jfs/jfs_debug.c
+++ b/fs/jfs/jfs_debug.c
@@ -26,34 +26,6 @@
#include "jfs_filsys.h"
#include "jfs_debug.h"
-#ifdef CONFIG_JFS_DEBUG
-void dump_mem(char *label, void *data, int length)
-{
- int i, j;
- int *intptr = data;
- char *charptr = data;
- char buf[10], line[80];
-
- printk("%s: dump of %d bytes of data at 0x%p\n\n", label, length,
- data);
- for (i = 0; i < length; i += 16) {
- line[0] = 0;
- for (j = 0; (j < 4) && (i + j * 4 < length); j++) {
- sprintf(buf, " %08x", intptr[i / 4 + j]);
- strcat(line, buf);
- }
- buf[0] = ' ';
- buf[2] = 0;
- for (j = 0; (j < 16) && (i + j < length); j++) {
- buf[1] =
- isprint(charptr[i + j]) ? charptr[i + j] : '.';
- strcat(line, buf);
- }
- printk("%s\n", line);
- }
-}
-#endif
-
#ifdef PROC_FS_JFS /* see jfs_debug.h */
static struct proc_dir_entry *base;
diff --git a/fs/jfs/jfs_debug.h b/fs/jfs/jfs_debug.h
index 7378798f0b2..044c1e654cc 100644
--- a/fs/jfs/jfs_debug.h
+++ b/fs/jfs/jfs_debug.h
@@ -62,7 +62,6 @@ extern void jfs_proc_clean(void);
extern int jfsloglevel;
-extern void dump_mem(char *label, void *data, int length);
extern int jfs_txanchor_read(char *, char **, off_t, int, int *, void *);
/* information message: e.g., configuration, major event */
@@ -94,7 +93,6 @@ extern int jfs_txanchor_read(char *, char **, off_t, int, int *, void *);
* ---------
*/
#else /* CONFIG_JFS_DEBUG */
-#define dump_mem(label,data,length) do {} while (0)
#define ASSERT(p) do {} while (0)
#define jfs_info(fmt, arg...) do {} while (0)
#define jfs_debug(fmt, arg...) do {} while (0)
diff --git a/fs/jfs/jfs_dinode.h b/fs/jfs/jfs_dinode.h
index 40b20111383..c387540d342 100644
--- a/fs/jfs/jfs_dinode.h
+++ b/fs/jfs/jfs_dinode.h
@@ -19,23 +19,23 @@
#define _H_JFS_DINODE
/*
- * jfs_dinode.h: on-disk inode manager
+ * jfs_dinode.h: on-disk inode manager
*/
-#define INODESLOTSIZE 128
-#define L2INODESLOTSIZE 7
-#define log2INODESIZE 9 /* log2(bytes per dinode) */
+#define INODESLOTSIZE 128
+#define L2INODESLOTSIZE 7
+#define log2INODESIZE 9 /* log2(bytes per dinode) */
/*
- * on-disk inode : 512 bytes
+ * on-disk inode : 512 bytes
*
* note: align 64-bit fields on 8-byte boundary.
*/
struct dinode {
/*
- * I. base area (128 bytes)
- * ------------------------
+ * I. base area (128 bytes)
+ * ------------------------
*
* define generic/POSIX attributes
*/
@@ -70,16 +70,16 @@ struct dinode {
__le32 di_acltype; /* 4: Type of ACL */
/*
- * Extension Areas.
+ * Extension Areas.
*
- * Historically, the inode was partitioned into 4 128-byte areas,
- * the last 3 being defined as unions which could have multiple
- * uses. The first 96 bytes had been completely unused until
- * an index table was added to the directory. It is now more
- * useful to describe the last 3/4 of the inode as a single
- * union. We would probably be better off redesigning the
- * entire structure from scratch, but we don't want to break
- * commonality with OS/2's JFS at this time.
+ * Historically, the inode was partitioned into 4 128-byte areas,
+ * the last 3 being defined as unions which could have multiple
+ * uses. The first 96 bytes had been completely unused until
+ * an index table was added to the directory. It is now more
+ * useful to describe the last 3/4 of the inode as a single
+ * union. We would probably be better off redesigning the
+ * entire structure from scratch, but we don't want to break
+ * commonality with OS/2's JFS at this time.
*/
union {
struct {
@@ -95,7 +95,7 @@ struct dinode {
} _dir; /* (384) */
#define di_dirtable u._dir._table
#define di_dtroot u._dir._dtroot
-#define di_parent di_dtroot.header.idotdot
+#define di_parent di_dtroot.header.idotdot
#define di_DASD di_dtroot.header.DASD
struct {
@@ -127,14 +127,14 @@ struct dinode {
#define di_inlinedata u._file._u2._special._u
#define di_rdev u._file._u2._special._u._rdev
#define di_fastsymlink u._file._u2._special._u._fastsymlink
-#define di_inlineea u._file._u2._special._inlineea
+#define di_inlineea u._file._u2._special._inlineea
} u;
};
/* extended mode bits (on-disk inode di_mode) */
-#define IFJOURNAL 0x00010000 /* journalled file */
-#define ISPARSE 0x00020000 /* sparse file enabled */
-#define INLINEEA 0x00040000 /* inline EA area free */
+#define IFJOURNAL 0x00010000 /* journalled file */
+#define ISPARSE 0x00020000 /* sparse file enabled */
+#define INLINEEA 0x00040000 /* inline EA area free */
#define ISWAPFILE 0x00800000 /* file open for pager swap space */
/* more extended mode bits: attributes for OS/2 */
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index f3b1ebb2228..e1985066b1c 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -154,12 +154,12 @@ static const s8 budtab[256] = {
* the in-core descriptor is initialized from disk.
*
* PARAMETERS:
- * ipbmap - pointer to in-core inode for the block map.
+ * ipbmap - pointer to in-core inode for the block map.
*
* RETURN VALUES:
- * 0 - success
- * -ENOMEM - insufficient memory
- * -EIO - i/o error
+ * 0 - success
+ * -ENOMEM - insufficient memory
+ * -EIO - i/o error
*/
int dbMount(struct inode *ipbmap)
{
@@ -232,11 +232,11 @@ int dbMount(struct inode *ipbmap)
* the memory for this descriptor is freed.
*
* PARAMETERS:
- * ipbmap - pointer to in-core inode for the block map.
+ * ipbmap - pointer to in-core inode for the block map.
*
* RETURN VALUES:
- * 0 - success
- * -EIO - i/o error
+ * 0 - success
+ * -EIO - i/o error
*/
int dbUnmount(struct inode *ipbmap, int mounterror)
{
@@ -320,13 +320,13 @@ int dbSync(struct inode *ipbmap)
* at a time.
*
* PARAMETERS:
- * ip - pointer to in-core inode;
- * blkno - starting block number to be freed.
- * nblocks - number of blocks to be freed.
+ * ip - pointer to in-core inode;
+ * blkno - starting block number to be freed.
+ * nblocks - number of blocks to be freed.
*
* RETURN VALUES:
- * 0 - success
- * -EIO - i/o error
+ * 0 - success
+ * -EIO - i/o error
*/
int dbFree(struct inode *ip, s64 blkno, s64 nblocks)
{
@@ -395,23 +395,23 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks)
/*
* NAME: dbUpdatePMap()
*
- * FUNCTION: update the allocation state (free or allocate) of the
+ * FUNCTION: update the allocation state (free or allocate) of the
* specified block range in the persistent block allocation map.
*
* the blocks will be updated in the persistent map one
* dmap at a time.
*
* PARAMETERS:
- * ipbmap - pointer to in-core inode for the block map.
- * free - 'true' if block range is to be freed from the persistent
- * map; 'false' if it is to be allocated.
- * blkno - starting block number of the range.
- * nblocks - number of contiguous blocks in the range.
- * tblk - transaction block;
+ * ipbmap - pointer to in-core inode for the block map.
+ * free - 'true' if block range is to be freed from the persistent
+ * map; 'false' if it is to be allocated.
+ * blkno - starting block number of the range.
+ * nblocks - number of contiguous blocks in the range.
+ * tblk - transaction block;
*
* RETURN VALUES:
- * 0 - success
- * -EIO - i/o error
+ * 0 - success
+ * -EIO - i/o error
*/
int
dbUpdatePMap(struct inode *ipbmap,
@@ -573,7 +573,7 @@ dbUpdatePMap(struct inode *ipbmap,
/*
* NAME: dbNextAG()
*
- * FUNCTION: find the preferred allocation group for new allocations.
+ * FUNCTION: find the preferred allocation group for new allocations.
*
* Within the allocation groups, we maintain a preferred
* allocation group which consists of a group with at least
@@ -589,10 +589,10 @@ dbUpdatePMap(struct inode *ipbmap,
* empty ags around for large allocations.
*
* PARAMETERS:
- * ipbmap - pointer to in-core inode for the block map.
+ * ipbmap - pointer to in-core inode for the block map.
*
* RETURN VALUES:
- * the preferred allocation group number.
+ * the preferred allocation group number.
*/
int dbNextAG(struct inode *ipbmap)
{
@@ -656,7 +656,7 @@ unlock:
/*
* NAME: dbAlloc()
*
- * FUNCTION: attempt to allocate a specified number of contiguous free
+ * FUNCTION: attempt to allocate a specified number of contiguous free
* blocks from the working allocation block map.
*
* the block allocation policy uses hints and a multi-step
@@ -680,16 +680,16 @@ unlock:
* size or requests that specify no hint value.
*
* PARAMETERS:
- * ip - pointer to in-core inode;
- * hint - allocation hint.
- * nblocks - number of contiguous blocks in the range.
- * results - on successful return, set to the starting block number
+ * ip - pointer to in-core inode;
+ * hint - allocation hint.
+ * nblocks - number of contiguous blocks in the range.
+ * results - on successful return, set to the starting block number
* of the newly allocated contiguous range.
*
* RETURN VALUES:
- * 0 - success
- * -ENOSPC - insufficient disk resources
- * -EIO - i/o error
+ * 0 - success
+ * -ENOSPC - insufficient disk resources
+ * -EIO - i/o error
*/
int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
{
@@ -706,12 +706,6 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
/* assert that nblocks is valid */
assert(nblocks > 0);
-#ifdef _STILL_TO_PORT
- /* DASD limit check F226941 */
- if (OVER_LIMIT(ip, nblocks))
- return -ENOSPC;
-#endif /* _STILL_TO_PORT */
-
/* get the log2 number of blocks to be allocated.
* if the number of blocks is not a log2 multiple,
* it will be rounded up to the next log2 multiple.
@@ -720,7 +714,6 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
bmp = JFS_SBI(ip->i_sb)->bmap;
-//retry: /* serialize w.r.t.extendfs() */
mapSize = bmp->db_mapsize;
/* the hint should be within the map */
@@ -879,17 +872,17 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
/*
* NAME: dbAllocExact()
*
- * FUNCTION: try to allocate the requested extent;
+ * FUNCTION: try to allocate the requested extent;
*
* PARAMETERS:
- * ip - pointer to in-core inode;
- * blkno - extent address;
- * nblocks - extent length;
+ * ip - pointer to in-core inode;
+ * blkno - extent address;
+ * nblocks - extent length;
*
* RETURN VALUES:
- * 0 - success
- * -ENOSPC - insufficient disk resources
- * -EIO - i/o error
+ * 0 - success
+ * -ENOSPC - insufficient disk resources
+ * -EIO - i/o error
*/
int dbAllocExact(struct inode *ip, s64 blkno, int nblocks)
{
@@ -946,7 +939,7 @@ int dbAllocExact(struct inode *ip, s64 blkno, int nblocks)
/*
* NAME: dbReAlloc()
*
- * FUNCTION: attempt to extend a current allocation by a specified
+ * FUNCTION: attempt to extend a current allocation by a specified
* number of blocks.
*
* this routine attempts to satisfy the allocation request
@@ -959,21 +952,21 @@ int dbAllocExact(struct inode *ip, s64 blkno, int nblocks)
* number of blocks required.
*
* PARAMETERS:
- * ip - pointer to in-core inode requiring allocation.
- * blkno - starting block of the current allocation.
- * nblocks - number of contiguous blocks within the current
+ * ip - pointer to in-core inode requiring allocation.
+ * blkno - starting block of the current allocation.
+ * nblocks - number of contiguous blocks within the current
* allocation.
- * addnblocks - number of blocks to add to the allocation.
- * results - on successful return, set to the starting block number
+ * addnblocks - number of blocks to add to the allocation.
+ * results - on successful return, set to the starting block number
* of the existing allocation if the existing allocation
* was extended in place or to a newly allocated contiguous
* range if the existing allocation could not be extended
* in place.
*
* RETURN VALUES:
- * 0 - success
- * -ENOSPC - insufficient disk resources
- * -EIO - i/o error
+ * 0 - success
+ * -ENOSPC - insufficient disk resources
+ * -EIO - i/o error
*/
int
dbReAlloc(struct inode *ip,
@@ -1004,7 +997,7 @@ dbReAlloc(struct inode *ip,
/*
* NAME: dbExtend()
*
- * FUNCTION: attempt to extend a current allocation by a specified
+ * FUNCTION: attempt to extend a current allocation by a specified
* number of blocks.
*
* this routine attempts to satisfy the allocation request
@@ -1013,16 +1006,16 @@ dbReAlloc(struct inode *ip,
* immediately following the current allocation.
*
* PARAMETERS:
- * ip - pointer to in-core inode requiring allocation.
- * blkno - starting block of the current allocation.
- * nblocks - number of contiguous blocks within the current
+ * ip - pointer to in-core inode requiring allocation.
+ * blkno - starting block of the current allocation.
+ * nblocks - number of contiguous blocks within the current
* allocation.
- * addnblocks - number of blocks to add to the allocation.
+ * addnblocks - number of blocks to add to the allocation.
*
* RETURN VALUES:
- * 0 - success
- * -ENOSPC - insufficient disk resources
- * -EIO - i/o error
+ * 0 - success
+ * -ENOSPC - insufficient disk resources
+ * -EIO - i/o error
*/
static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks)
{
@@ -1109,19 +1102,19 @@ static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks)
/*
* NAME: dbAllocNext()
*
- * FUNCTION: attempt to allocate the blocks of the specified block
+ * FUNCTION: attempt to allocate the blocks of the specified block
* range within a dmap.
*
* PARAMETERS:
- * bmp - pointer to bmap descriptor
- * dp - pointer to dmap.
- * blkno - starting block number of the range.
- * nblocks - number of contiguous free blocks of the range.
+ * bmp - pointer to bmap descriptor
+ * dp - pointer to dmap.
+ * blkno - starting block number of the range.
+ * nblocks - number of contiguous free blocks of the range.
*
* RETURN VALUES:
- * 0 - success
- * -ENOSPC - insufficient disk resources
- * -EIO - i/o error
+ * 0 - success
+ * -ENOSPC - insufficient disk resources
+ * -EIO - i/o error
*
* serialization: IREAD_LOCK(ipbmap) held on entry/exit;
*/
@@ -1233,7 +1226,7 @@ static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno,
/*
* NAME: dbAllocNear()
*
- * FUNCTION: attempt to allocate a number of contiguous free blocks near
+ * FUNCTION: attempt to allocate a number of contiguous free blocks near
* a specified block (hint) within a dmap.
*
* starting with the dmap leaf that covers the hint, we'll
@@ -1242,18 +1235,18 @@ static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno,
* the desired free space.
*
* PARAMETERS:
- * bmp - pointer to bmap descriptor
- * dp - pointer to dmap.
- * blkno - block number to allocate near.
- * nblocks - actual number of contiguous free blocks desired.
- * l2nb - log2 number of contiguous free blocks desired.
- * results - on successful return, set to the starting block number
+ * bmp - pointer to bmap descriptor
+ * dp - pointer to dmap.
+ * blkno - block number to allocate near.
+ * nblocks - actual number of contiguous free blocks desired.
+ * l2nb - log2 number of contiguous free blocks desired.
+ * results - on successful return, set to the starting block number
* of the newly allocated range.
*
* RETURN VALUES:
- * 0 - success
- * -ENOSPC - insufficient disk resources
- * -EIO - i/o error
+ * 0 - success
+ * -ENOSPC - insufficient disk resources
+ * -EIO - i/o error
*
* serialization: IREAD_LOCK(ipbmap) held on entry/exit;
*/
@@ -1316,7 +1309,7 @@ dbAllocNear(struct bmap * bmp,
/*
* NAME: dbAllocAG()
*
- * FUNCTION: attempt to allocate the specified number of contiguous
+ * FUNCTION: attempt to allocate the specified number of contiguous
* free blocks within the specified allocation group.
*
* unless the allocation group size is equal to the number
@@ -1353,17 +1346,17 @@ dbAllocNear(struct bmap * bmp,
* the allocation group.
*
* PARAMETERS:
- * bmp - pointer to bmap descriptor
+ * bmp - pointer to bmap descriptor
* agno - allocation group number.
- * nblocks - actual number of contiguous free blocks desired.
- * l2nb - log2 number of contiguous free blocks desired.
- * results - on successful return, set to the starting block number
+ * nblocks - actual number of contiguous free blocks desired.
+ * l2nb - log2 number of contiguous free blocks desired.
+ * results - on successful return, set to the starting block number
* of the newly allocated range.
*
* RETURN VALUES:
- * 0 - success
- * -ENOSPC - insufficient disk resources
- * -EIO - i/o error
+ * 0 - success
+ * -ENOSPC - insufficient disk resources
+ * -EIO - i/o error
*
* note: IWRITE_LOCK(ipmap) held on entry/exit;
*/
@@ -1546,7 +1539,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results)
/*
* NAME: dbAllocAny()
*
- * FUNCTION: attempt to allocate the specified number of contiguous
+ * FUNCTION: attempt to allocate the specified number of contiguous
* free blocks anywhere in the file system.
*
* dbAllocAny() attempts to find the sufficient free space by
@@ -1556,16 +1549,16 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results)
* desired free space is allocated.
*
* PARAMETERS:
- * bmp - pointer to bmap descriptor
- * nblocks - actual number of contiguous free blocks desired.
- * l2nb - log2 number of contiguous free blocks desired.
- * results - on successful return, set to the starting block number
+ * bmp - pointer to bmap descriptor
+ * nblocks - actual number of contiguous free blocks desired.
+ * l2nb - log2 number of contiguous free blocks desired.
+ * results - on successful return, set to the starting block number
* of the newly allocated range.
*
* RETURN VALUES:
- * 0 - success
- * -ENOSPC - insufficient disk resources
- * -EIO - i/o error
+ * 0 - success
+ * -ENOSPC - insufficient disk resources
+ * -EIO - i/o error
*
* serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
*/
@@ -1598,9 +1591,9 @@ static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results)
/*
* NAME: dbFindCtl()
*
- * FUNCTION: starting at a specified dmap control page level and block
+ * FUNCTION: starting at a specified dmap control page level and block
* number, search down the dmap control levels for a range of
- * contiguous free blocks large enough to satisfy an allocation
+ * contiguous free blocks large enough to satisfy an allocation
* request for the specified number of free blocks.
*
* if sufficient contiguous free blocks are found, this routine
@@ -1609,17 +1602,17 @@ static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results)
* is sufficient in size.
*
* PARAMETERS:
- * bmp - pointer to bmap descriptor
- * level - starting dmap control page level.
- * l2nb - log2 number of contiguous free blocks desired.
- * *blkno - on entry, starting block number for conducting the search.
+ * bmp - pointer to bmap descriptor
+ * level - starting dmap control page level.
+ * l2nb - log2 number of contiguous free blocks desired.
+ * *blkno - on entry, starting block number for conducting the search.
* on successful return, the first block within a dmap page
* that contains or starts a range of contiguous free blocks.
*
* RETURN VALUES:
- * 0 - success
- * -ENOSPC - insufficient disk resources
- * -EIO - i/o error
+ * 0 - success
+ * -ENOSPC - insufficient disk resources
+ * -EIO - i/o error
*
* serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
*/
@@ -1699,7 +1692,7 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno)
/*
* NAME: dbAllocCtl()
*
- * FUNCTION: attempt to allocate a specified number of contiguous
+ * FUNCTION: attempt to allocate a specified number of contiguous
* blocks starting within a specific dmap.
*
* this routine is called by higher level routines that search
@@ -1726,18 +1719,18 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno)
* first dmap (i.e. blkno).
*
* PARAMETERS:
- * bmp - pointer to bmap descriptor
- * nblocks - actual number of contiguous free blocks to allocate.
- * l2nb - log2 number of contiguous free blocks to allocate.
- * blkno - starting block number of the dmap to start the allocation
+ * bmp - pointer to bmap descriptor
+ * nblocks - actual number of contiguous free blocks to allocate.
+ * l2nb - log2 number of contiguous free blocks to allocate.
+ * blkno - starting block number of the dmap to start the allocation
* from.
- * results - on successful return, set to the starting block number
+ * results - on successful return, set to the starting block number
* of the newly allocated range.
*
* RETURN VALUES:
- * 0 - success
- * -ENOSPC - insufficient disk resources
- * -EIO - i/o error
+ * 0 - success
+ * -ENOSPC - insufficient disk resources
+ * -EIO - i/o error
*
* serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
*/
@@ -1870,7 +1863,7 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results)
/*
* NAME: dbAllocDmapLev()
*
- * FUNCTION: attempt to allocate a specified number of contiguous blocks
+ * FUNCTION: attempt to allocate a specified number of contiguous blocks
* from a specified dmap.
*
* this routine checks if the contiguous blocks are available.
@@ -1878,17 +1871,17 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results)
* returned.
*
* PARAMETERS:
- * mp - pointer to bmap descriptor
- * dp - pointer to dmap to attempt to allocate blocks from.
- * l2nb - log2 number of contiguous block desired.
- * nblocks - actual number of contiguous block desired.
- * results - on successful return, set to the starting block number
+ * mp - pointer to bmap descriptor
+ * dp - pointer to dmap to attempt to allocate blocks from.
+ * l2nb - log2 number of contiguous block desired.
+ * nblocks - actual number of contiguous block desired.
+ * results - on successful return, set to the starting block number
* of the newly allocated range.
*
* RETURN VALUES:
- * 0 - success
- * -ENOSPC - insufficient disk resources
- * -EIO - i/o error
+ * 0 - success
+ * -ENOSPC - insufficient disk resources
+ * -EIO - i/o error
*
* serialization: IREAD_LOCK(ipbmap), e.g., from dbAlloc(), or
* IWRITE_LOCK(ipbmap), e.g., dbAllocCtl(), held on entry/exit;
@@ -1933,7 +1926,7 @@ dbAllocDmapLev(struct bmap * bmp,
/*
* NAME: dbAllocDmap()
*
- * FUNCTION: adjust the disk allocation map to reflect the allocation
+ * FUNCTION: adjust the disk allocation map to reflect the allocation
* of a specified block range within a dmap.
*
* this routine allocates the specified blocks from the dmap
@@ -1946,14 +1939,14 @@ dbAllocDmapLev(struct bmap * bmp,
* covers this dmap.
*
* PARAMETERS:
- * bmp - pointer to bmap descriptor
- * dp - pointer to dmap to allocate the block range from.
- * blkno - starting block number of the block to be allocated.
- * nblocks - number of blocks to be allocated.
+ * bmp - pointer to bmap descriptor
+ * dp - pointer to dmap to allocate the block range from.
+ * blkno - starting block number of the block to be allocated.
+ * nblocks - number of blocks to be allocated.
*
* RETURN VALUES:
- * 0 - success
- * -EIO - i/o error
+ * 0 - success
+ * -EIO - i/o error
*
* serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
*/
@@ -1989,7 +1982,7 @@ static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
/*
* NAME: dbFreeDmap()
*
- * FUNCTION: adjust the disk allocation map to reflect the allocation
+ * FUNCTION: adjust the disk allocation map to reflect the allocation
* of a specified block range within a dmap.
*
* this routine frees the specified blocks from the dmap through
@@ -1997,18 +1990,18 @@ static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
* causes the maximum string of free blocks within the dmap to
* change (i.e. the value of the root of the dmap's dmtree), this
* routine will cause this change to be reflected up through the
- * appropriate levels of the dmap control pages by a call to
+ * appropriate levels of the dmap control pages by a call to
* dbAdjCtl() for the L0 dmap control page that covers this dmap.
*
* PARAMETERS:
- * bmp - pointer to bmap descriptor
- * dp - pointer to dmap to free the block range from.
- * blkno - starting block number of the block to be freed.
- * nblocks - number of blocks to be freed.
+ * bmp - pointer to bmap descriptor
+ * dp - pointer to dmap to free the block range from.
+ * blkno - starting block number of the block to be freed.
+ * nblocks - number of blocks to be freed.
*
* RETURN VALUES:
- * 0 - success
- * -EIO - i/o error
+ * 0 - success
+ * -EIO - i/o error
*
* serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
*/
@@ -2055,7 +2048,7 @@ static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
/*
* NAME: dbAllocBits()
*
- * FUNCTION: allocate a specified block range from a dmap.
+ * FUNCTION: allocate a specified block range from a dmap.
*
* this routine updates the dmap to reflect the working
* state allocation of the specified block range. it directly
@@ -2065,10 +2058,10 @@ static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
* dmap's dmtree, as a whole, to reflect the allocated range.
*
* PARAMETERS:
- * bmp - pointer to bmap descriptor
- * dp - pointer to dmap to allocate bits from.
- * blkno - starting block number of the bits to be allocated.
- * nblocks - number of bits to be allocated.
+ * bmp - pointer to bmap descriptor
+ * dp - pointer to dmap to allocate bits from.
+ * blkno - starting block number of the bits to be allocated.
+ * nblocks - number of bits to be allocated.
*
* RETURN VALUES: none
*
@@ -2149,7 +2142,7 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
* the allocated words.
*/
for (; nwords > 0; nwords -= nw) {
- if (leaf[word] < BUDMIN) {
+ if (leaf[word] < BUDMIN) {
jfs_error(bmp->db_ipbmap->i_sb,
"dbAllocBits: leaf page "
"corrupt");
@@ -2202,7 +2195,7 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
/*
* NAME: dbFreeBits()
*
- * FUNCTION: free a specified block range from a dmap.
+ * FUNCTION: free a specified block range from a dmap.
*
* this routine updates the dmap to reflect the working
* state allocation of the specified block range. it directly
@@ -2212,10 +2205,10 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
* dmtree, as a whole, to reflect the deallocated range.
*
* PARAMETERS:
- * bmp - pointer to bmap descriptor
- * dp - pointer to dmap to free bits from.
- * blkno - starting block number of the bits to be freed.
- * nblocks - number of bits to be freed.
+ * bmp - pointer to bmap descriptor
+ * dp - pointer to dmap to free bits from.
+ * blkno - starting block number of the bits to be freed.
+ * nblocks - number of bits to be freed.
*
* RETURN VALUES: 0 for success
*
@@ -2388,19 +2381,19 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
* the new root value and the next dmap control page level to
* be adjusted.
* PARAMETERS:
- * bmp - pointer to bmap descriptor
- * blkno - the first block of a block range within a dmap. it is
+ * bmp - pointer to bmap descriptor
+ * blkno - the first block of a block range within a dmap. it is
* the allocation or deallocation of this block range that
* requires the dmap control page to be adjusted.
- * newval - the new value of the lower level dmap or dmap control
+ * newval - the new value of the lower level dmap or dmap control
* page root.
- * alloc - 'true' if adjustment is due to an allocation.
- * level - current level of dmap control page (i.e. L0, L1, L2) to
+ * alloc - 'true' if adjustment is due to an allocation.
+ * level - current level of dmap control page (i.e. L0, L1, L2) to
* be adjusted.
*
* RETURN VALUES:
- * 0 - success
- * -EIO - i/o error
+ * 0 - success
+ * -EIO - i/o error
*
* serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
*/
@@ -2544,16 +2537,16 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level)
/*
* NAME: dbSplit()
*
- * FUNCTION: update the leaf of a dmtree with a new value, splitting
+ * FUNCTION: update the leaf of a dmtree with a new value, splitting
* the leaf from the binary buddy system of the dmtree's
* leaves, as required.
*
* PARAMETERS:
- * tp - pointer to the tree containing the leaf.
- * leafno - the number of the leaf to be updated.
- * splitsz - the size the binary buddy system starting at the leaf
+ * tp - pointer to the tree containing the leaf.
+ * leafno - the number of the leaf to be updated.
+ * splitsz - the size the binary buddy system starting at the leaf
* must be split to, specified as the log2 number of blocks.
- * newval - the new value for the leaf.
+ * newval - the new value for the leaf.
*
* RETURN VALUES: none
*
@@ -2600,7 +2593,7 @@ static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval)
/*
* NAME: dbBackSplit()
*
- * FUNCTION: back split the binary buddy system of dmtree leaves
+ * FUNCTION: back split the binary buddy system of dmtree leaves
* that hold a specified leaf until the specified leaf
* starts its own binary buddy system.
*
@@ -2617,8 +2610,8 @@ static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval)
* in which a previous join operation must be backed out.
*
* PARAMETERS:
- * tp - pointer to the tree containing the leaf.
- * leafno - the number of the leaf to be updated.
+ * tp - pointer to the tree containing the leaf.
+ * leafno - the number of the leaf to be updated.
*
* RETURN VALUES: none
*
@@ -2692,14 +2685,14 @@ static int dbBackSplit(dmtree_t * tp, int leafno)
/*
* NAME: dbJoin()
*
- * FUNCTION: update the leaf of a dmtree with a new value, joining
+ * FUNCTION: update the leaf of a dmtree with a new value, joining
* the leaf with other leaves of the dmtree into a multi-leaf
* binary buddy system, as required.
*
* PARAMETERS:
- * tp - pointer to the tree containing the leaf.
- * leafno - the number of the leaf to be updated.
- * newval - the new value for the leaf.
+ * tp - pointer to the tree containing the leaf.
+ * leafno - the number of the leaf to be updated.
+ * newval - the new value for the leaf.
*
* RETURN VALUES: none
*/
@@ -2785,15 +2778,15 @@ static int dbJoin(dmtree_t * tp, int leafno, int newval)
/*
* NAME: dbAdjTree()
*
- * FUNCTION: update a leaf of a dmtree with a new value, adjusting
+ * FUNCTION: update a leaf of a dmtree with a new value, adjusting
* the dmtree, as required, to reflect the new leaf value.
* the combination of any buddies must already be done before
* this is called.
*
* PARAMETERS:
- * tp - pointer to the tree to be adjusted.
- * leafno - the number of the leaf to be updated.
- * newval - the new value for the leaf.
+ * tp - pointer to the tree to be adjusted.
+ * leafno - the number of the leaf to be updated.
+ * newval - the new value for the leaf.
*
* RETURN VALUES: none
*/
@@ -2852,7 +2845,7 @@ static void dbAdjTree(dmtree_t * tp, int leafno, int newval)
/*
* NAME: dbFindLeaf()
*
- * FUNCTION: search a dmtree_t for sufficient free blocks, returning
+ * FUNCTION: search a dmtree_t for sufficient free blocks, returning
* the index of a leaf describing the free blocks if
* sufficient free blocks are found.
*
@@ -2861,15 +2854,15 @@ static void dbAdjTree(dmtree_t * tp, int leafno, int newval)
* free space.
*
* PARAMETERS:
- * tp - pointer to the tree to be searched.
- * l2nb - log2 number of free blocks to search for.
+ * tp - pointer to the tree to be searched.
+ * l2nb - log2 number of free blocks to search for.
* leafidx - return pointer to be set to the index of the leaf
* describing at least l2nb free blocks if sufficient
* free blocks are found.
*
* RETURN VALUES:
- * 0 - success
- * -ENOSPC - insufficient free blocks.
+ * 0 - success
+ * -ENOSPC - insufficient free blocks.
*/
static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx)
{
@@ -2916,18 +2909,18 @@ static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx)
/*
* NAME: dbFindBits()
*
- * FUNCTION: find a specified number of binary buddy free bits within a
+ * FUNCTION: find a specified number of binary buddy free bits within a
* dmap bitmap word value.
*
* this routine searches the bitmap value for (1 << l2nb) free
* bits at (1 << l2nb) alignments within the value.
*
* PARAMETERS:
- * word - dmap bitmap word value.
- * l2nb - number of free bits specified as a log2 number.
+ * word - dmap bitmap word value.
+ * l2nb - number of free bits specified as a log2 number.
*
* RETURN VALUES:
- * starting bit number of free bits.
+ * starting bit number of free bits.
*/
static int dbFindBits(u32 word, int l2nb)
{
@@ -2963,14 +2956,14 @@ static int dbFindBits(u32 word, int l2nb)
/*
* NAME: dbMaxBud(u8 *cp)
*
- * FUNCTION: determine the largest binary buddy string of free
+ * FUNCTION: determine the largest binary buddy string of free
* bits within 32-bits of the map.
*
* PARAMETERS:
- * cp - pointer to the 32-bit value.
+ * cp - pointer to the 32-bit value.
*
* RETURN VALUES:
- * largest binary buddy of free bits within a dmap word.
+ * largest binary buddy of free bits within a dmap word.
*/
static int dbMaxBud(u8 * cp)
{
@@ -3000,14 +2993,14 @@ static int dbMaxBud(u8 * cp)
/*
* NAME: cnttz(uint word)
*
- * FUNCTION: determine the number of trailing zeros within a 32-bit
+ * FUNCTION: determine the number of trailing zeros within a 32-bit
* value.
*
* PARAMETERS:
- * value - 32-bit value to be examined.
+ * value - 32-bit value to be examined.
*
* RETURN VALUES:
- * count of trailing zeros
+ * count of trailing zeros
*/
static int cnttz(u32 word)
{
@@ -3025,14 +3018,14 @@ static int cnttz(u32 word)
/*
* NAME: cntlz(u32 value)
*
- * FUNCTION: determine the number of leading zeros within a 32-bit
+ * FUNCTION: determine the number of leading zeros within a 32-bit
* value.
*
* PARAMETERS:
- * value - 32-bit value to be examined.
+ * value - 32-bit value to be examined.
*
* RETURN VALUES:
- * count of leading zeros
+ * count of leading zeros
*/
static int cntlz(u32 value)
{
@@ -3050,14 +3043,14 @@ static int cntlz(u32 value)
* NAME: blkstol2(s64 nb)
*
* FUNCTION: convert a block count to its log2 value. if the block
- * count is not a l2 multiple, it is rounded up to the next
+ * count is not a l2 multiple, it is rounded up to the next
* larger l2 multiple.
*
* PARAMETERS:
- * nb - number of blocks
+ * nb - number of blocks
*
* RETURN VALUES:
- * log2 number of blocks
+ * log2 number of blocks
*/
static int blkstol2(s64 nb)
{
@@ -3099,13 +3092,13 @@ static int blkstol2(s64 nb)
* at a time.
*
* PARAMETERS:
- * ip - pointer to in-core inode;
- * blkno - starting block number to be freed.
- * nblocks - number of blocks to be freed.
+ * ip - pointer to in-core inode;
+ * blkno - starting block number to be freed.
+ * nblocks - number of blocks to be freed.
*
* RETURN VALUES:
- * 0 - success
- * -EIO - i/o error
+ * 0 - success
+ * -EIO - i/o error
*/
int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks)
{
@@ -3278,10 +3271,10 @@ static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno,
* L2
* |
* L1---------------------------------L1
- * | |
- * L0---------L0---------L0 L0---------L0---------L0
- * | | | | | |
- * d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,.,dm;
+ * | |
+ * L0---------L0---------L0 L0---------L0---------L0
+ * | | | | | |
+ * d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,.,dm;
* L2L1L0d0,...,dnL0d0,...,dnL0d0,...,dnL1L0d0,...,dnL0d0,...,dnL0d0,..dm
*
* <---old---><----------------------------extend----------------------->
@@ -3307,7 +3300,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks)
(long long) blkno, (long long) nblocks, (long long) newsize);
/*
- * initialize bmap control page.
+ * initialize bmap control page.
*
* all the data in bmap control page should exclude
* the mkfs hidden dmap page.
@@ -3330,7 +3323,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks)
bmp->db_numag += ((u32) newsize % (u32) bmp->db_agsize) ? 1 : 0;
/*
- * reconfigure db_agfree[]
+ * reconfigure db_agfree[]
* from old AG configuration to new AG configuration;
*
* coalesce contiguous k (newAGSize/oldAGSize) AGs;
@@ -3362,7 +3355,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks)
bmp->db_maxag = bmp->db_maxag / k;
/*
- * extend bmap
+ * extend bmap
*
* update bit maps and corresponding level control pages;
* global control page db_nfree, db_agfree[agno], db_maxfreebud;
@@ -3410,7 +3403,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks)
/* compute start L0 */
j = 0;
l1leaf = l1dcp->stree + CTLLEAFIND;
- p += nbperpage; /* 1st L0 of L1.k */
+ p += nbperpage; /* 1st L0 of L1.k */
}
/*
@@ -3548,7 +3541,7 @@ errout:
return -EIO;
/*
- * finalize bmap control page
+ * finalize bmap control page
*/
finalize:
@@ -3567,7 +3560,7 @@ void dbFinalizeBmap(struct inode *ipbmap)
int i, n;
/*
- * finalize bmap control page
+ * finalize bmap control page
*/
//finalize:
/*
@@ -3953,8 +3946,8 @@ static int dbGetL2AGSize(s64 nblocks)
* convert number of map pages to the zero origin top dmapctl level
*/
#define BMAPPGTOLEV(npages) \
- (((npages) <= 3 + MAXL0PAGES) ? 0 \
- : ((npages) <= 2 + MAXL1PAGES) ? 1 : 2)
+ (((npages) <= 3 + MAXL0PAGES) ? 0 : \
+ ((npages) <= 2 + MAXL1PAGES) ? 1 : 2)
s64 dbMapFileSizeToMapSize(struct inode * ipbmap)
{
@@ -3981,8 +3974,8 @@ s64 dbMapFileSizeToMapSize(struct inode * ipbmap)
factor =
(i == 2) ? MAXL1PAGES : ((i == 1) ? MAXL0PAGES : 1);
complete = (u32) npages / factor;
- ndmaps += complete * ((i == 2) ? LPERCTL * LPERCTL
- : ((i == 1) ? LPERCTL : 1));
+ ndmaps += complete * ((i == 2) ? LPERCTL * LPERCTL :
+ ((i == 1) ? LPERCTL : 1));
/* pages in last/incomplete child */
npages = (u32) npages % factor;
diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h
index 45ea454c74b..11e6d471b36 100644
--- a/fs/jfs/jfs_dmap.h
+++ b/fs/jfs/jfs_dmap.h
@@ -83,7 +83,7 @@ static __inline signed char TREEMAX(signed char *cp)
* - 1 is added to account for the control page of the map.
*/
#define BLKTODMAP(b,s) \
- ((((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1) << (s))
+ ((((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1) << (s))
/*
* convert disk block number to the logical block number of the LEVEL 0
@@ -98,7 +98,7 @@ static __inline signed char TREEMAX(signed char *cp)
* - 1 is added to account for the control page of the map.
*/
#define BLKTOL0(b,s) \
- (((((b) >> 23) << 10) + ((b) >> 23) + ((b) >> 33) + 2 + 1) << (s))
+ (((((b) >> 23) << 10) + ((b) >> 23) + ((b) >> 33) + 2 + 1) << (s))
/*
* convert disk block number to the logical block number of the LEVEL 1
@@ -120,7 +120,7 @@ static __inline signed char TREEMAX(signed char *cp)
* at the specified level which describes the disk block.
*/
#define BLKTOCTL(b,s,l) \
- (((l) == 2) ? 1 : ((l) == 1) ? BLKTOL1((b),(s)) : BLKTOL0((b),(s)))
+ (((l) == 2) ? 1 : ((l) == 1) ? BLKTOL1((b),(s)) : BLKTOL0((b),(s)))
/*
* convert aggregate map size to the zero origin dmapctl level of the
@@ -145,27 +145,27 @@ static __inline signed char TREEMAX(signed char *cp)
* dmaptree must be consistent with dmapctl.
*/
struct dmaptree {
- __le32 nleafs; /* 4: number of tree leafs */
- __le32 l2nleafs; /* 4: l2 number of tree leafs */
- __le32 leafidx; /* 4: index of first tree leaf */
- __le32 height; /* 4: height of the tree */
+ __le32 nleafs; /* 4: number of tree leafs */
+ __le32 l2nleafs; /* 4: l2 number of tree leafs */
+ __le32 leafidx; /* 4: index of first tree leaf */
+ __le32 height; /* 4: height of the tree */
s8 budmin; /* 1: min l2 tree leaf value to combine */
- s8 stree[TREESIZE]; /* TREESIZE: tree */
- u8 pad[2]; /* 2: pad to word boundary */
-}; /* - 360 - */
+ s8 stree[TREESIZE]; /* TREESIZE: tree */
+ u8 pad[2]; /* 2: pad to word boundary */
+}; /* - 360 - */
/*
* dmap page per 8K blocks bitmap
*/
struct dmap {
- __le32 nblocks; /* 4: num blks covered by this dmap */
- __le32 nfree; /* 4: num of free blks in this dmap */
- __le64 start; /* 8: starting blkno for this dmap */
- struct dmaptree tree; /* 360: dmap tree */
- u8 pad[1672]; /* 1672: pad to 2048 bytes */
- __le32 wmap[LPERDMAP]; /* 1024: bits of the working map */
- __le32 pmap[LPERDMAP]; /* 1024: bits of the persistent map */
-}; /* - 4096 - */
+ __le32 nblocks; /* 4: num blks covered by this dmap */
+ __le32 nfree; /* 4: num of free blks in this dmap */
+ __le64 start; /* 8: starting blkno for this dmap */
+ struct dmaptree tree; /* 360: dmap tree */
+ u8 pad[1672]; /* 1672: pad to 2048 bytes */
+ __le32 wmap[LPERDMAP]; /* 1024: bits of the working map */
+ __le32 pmap[LPERDMAP]; /* 1024: bits of the persistent map */
+}; /* - 4096 - */
/*
* disk map control page per level.
@@ -173,14 +173,14 @@ struct dmap {
* dmapctl must be consistent with dmaptree.
*/
struct dmapctl {
- __le32 nleafs; /* 4: number of tree leafs */
- __le32 l2nleafs; /* 4: l2 number of tree leafs */
- __le32 leafidx; /* 4: index of the first tree leaf */
- __le32 height; /* 4: height of tree */
- s8 budmin; /* 1: minimum l2 tree leaf value */
- s8 stree[CTLTREESIZE]; /* CTLTREESIZE: dmapctl tree */
- u8 pad[2714]; /* 2714: pad to 4096 */
-}; /* - 4096 - */
+ __le32 nleafs; /* 4: number of tree leafs */
+ __le32 l2nleafs; /* 4: l2 number of tree leafs */
+ __le32 leafidx; /* 4: index of the first tree leaf */
+ __le32 height; /* 4: height of tree */
+ s8 budmin; /* 1: minimum l2 tree leaf value */
+ s8 stree[CTLTREESIZE]; /* CTLTREESIZE: dmapctl tree */
+ u8 pad[2714]; /* 2714: pad to 4096 */
+}; /* - 4096 - */
/*
* common definition for dmaptree within dmap and dmapctl
@@ -202,41 +202,41 @@ typedef union dmtree {
* on-disk aggregate disk allocation map descriptor.
*/
struct dbmap_disk {
- __le64 dn_mapsize; /* 8: number of blocks in aggregate */
- __le64 dn_nfree; /* 8: num free blks in aggregate map */
- __le32 dn_l2nbperpage; /* 4: number of blks per page */
- __le32 dn_numag; /* 4: total number of ags */
- __le32 dn_maxlevel; /* 4: number of active ags */
- __le32 dn_maxag; /* 4: max active alloc group number */
- __le32 dn_agpref; /* 4: preferred alloc group (hint) */
- __le32 dn_aglevel; /* 4: dmapctl level holding the AG */
- __le32 dn_agheigth; /* 4: height in dmapctl of the AG */
- __le32 dn_agwidth; /* 4: width in dmapctl of the AG */
- __le32 dn_agstart; /* 4: start tree index at AG height */
- __le32 dn_agl2size; /* 4: l2 num of blks per alloc group */
- __le64 dn_agfree[MAXAG];/* 8*MAXAG: per AG free count */
- __le64 dn_agsize; /* 8: num of blks per alloc group */
- s8 dn_maxfreebud; /* 1: max free buddy system */
- u8 pad[3007]; /* 3007: pad to 4096 */
-}; /* - 4096 - */
+ __le64 dn_mapsize; /* 8: number of blocks in aggregate */
+ __le64 dn_nfree; /* 8: num free blks in aggregate map */
+ __le32 dn_l2nbperpage; /* 4: number of blks per page */
+ __le32 dn_numag; /* 4: total number of ags */
+ __le32 dn_maxlevel; /* 4: number of active ags */
+ __le32 dn_maxag; /* 4: max active alloc group number */
+ __le32 dn_agpref; /* 4: preferred alloc group (hint) */
+ __le32 dn_aglevel; /* 4: dmapctl level holding the AG */
+ __le32 dn_agheigth; /* 4: height in dmapctl of the AG */
+ __le32 dn_agwidth; /* 4: width in dmapctl of the AG */
+ __le32 dn_agstart; /* 4: start tree index at AG height */
+ __le32 dn_agl2size; /* 4: l2 num of blks per alloc group */
+ __le64 dn_agfree[MAXAG];/* 8*MAXAG: per AG free count */
+ __le64 dn_agsize; /* 8: num of blks per alloc group */
+ s8 dn_maxfreebud; /* 1: max free buddy system */
+ u8 pad[3007]; /* 3007: pad to 4096 */
+}; /* - 4096 - */
struct dbmap {
- s64 dn_mapsize; /* number of blocks in aggregate */
- s64 dn_nfree; /* num free blks in aggregate map */
- int dn_l2nbperpage; /* number of blks per page */
- int dn_numag; /* total number of ags */
- int dn_maxlevel; /* number of active ags */
- int dn_maxag; /* max active alloc group number */
- int dn_agpref; /* preferred alloc group (hint) */
- int dn_aglevel; /* dmapctl level holding the AG */
- int dn_agheigth; /* height in dmapctl of the AG */
- int dn_agwidth; /* width in dmapctl of the AG */
- int dn_agstart; /* start tree index at AG height */
- int dn_agl2size; /* l2 num of blks per alloc group */
- s64 dn_agfree[MAXAG]; /* per AG free count */
- s64 dn_agsize; /* num of blks per alloc group */
- signed char dn_maxfreebud; /* max free buddy system */
-}; /* - 4096 - */
+ s64 dn_mapsize; /* number of blocks in aggregate */
+ s64 dn_nfree; /* num free blks in aggregate map */
+ int dn_l2nbperpage; /* number of blks per page */
+ int dn_numag; /* total number of ags */
+ int dn_maxlevel; /* number of active ags */
+ int dn_maxag; /* max active alloc group number */
+ int dn_agpref; /* preferred alloc group (hint) */
+ int dn_aglevel; /* dmapctl level holding the AG */
+ int dn_agheigth; /* height in dmapctl of the AG */
+ int dn_agwidth; /* width in dmapctl of the AG */
+ int dn_agstart; /* start tree index at AG height */
+ int dn_agl2size; /* l2 num of blks per alloc group */
+ s64 dn_agfree[MAXAG]; /* per AG free count */
+ s64 dn_agsize; /* num of blks per alloc group */
+ signed char dn_maxfreebud; /* max free buddy system */
+}; /* - 4096 - */
/*
* in-memory aggregate disk allocation map descriptor.
*/
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index 6d62f322289..c14ba3cfa81 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -315,8 +315,8 @@ static inline void lock_index(tid_t tid, struct inode *ip, struct metapage * mp,
lv = &llck->lv[llck->index];
/*
- * Linelock slot size is twice the size of directory table
- * slot size. 512 entries per page.
+ * Linelock slot size is twice the size of directory table
+ * slot size. 512 entries per page.
*/
lv->offset = ((index - 2) & 511) >> 1;
lv->length = 1;
@@ -615,7 +615,7 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data,
btstack->nsplit = 1;
/*
- * search down tree from root:
+ * search down tree from root:
*
* between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
* internal page, child page Pi contains entry with k, Ki <= K < Kj.
@@ -659,7 +659,7 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data,
}
if (cmp == 0) {
/*
- * search hit
+ * search hit
*/
/* search hit - leaf page:
* return the entry found
@@ -723,7 +723,7 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data,
}
/*
- * search miss
+ * search miss
*
* base is the smallest index with key (Kj) greater than
* search key (K) and may be zero or (maxindex + 1) index.
@@ -834,7 +834,7 @@ int dtInsert(tid_t tid, struct inode *ip,
struct lv *lv;
/*
- * retrieve search result
+ * retrieve search result
*
* dtSearch() returns (leaf page pinned, index at which to insert).
* n.b. dtSearch() may return index of (maxindex + 1) of
@@ -843,7 +843,7 @@ int dtInsert(tid_t tid, struct inode *ip,
DT_GETSEARCH(ip, btstack->top, bn, mp, p, index);
/*
- * insert entry for new key
+ * insert entry for new key
*/
if (DO_INDEX(ip)) {
if (JFS_IP(ip)->next_index == DIREND) {
@@ -860,9 +860,9 @@ int dtInsert(tid_t tid, struct inode *ip,
data.leaf.ino = *fsn;
/*
- * leaf page does not have enough room for new entry:
+ * leaf page does not have enough room for new entry:
*
- * extend/split the leaf page;
+ * extend/split the leaf page;
*
* dtSplitUp() will insert the entry and unpin the leaf page.
*/
@@ -877,9 +877,9 @@ int dtInsert(tid_t tid, struct inode *ip,
}
/*
- * leaf page does have enough room for new entry:
+ * leaf page does have enough room for new entry:
*
- * insert the new data entry into the leaf page;
+ * insert the new data entry into the leaf page;
*/
BT_MARK_DIRTY(mp, ip);
/*
@@ -967,13 +967,13 @@ static int dtSplitUp(tid_t tid,
}
/*
- * split leaf page
+ * split leaf page
*
* The split routines insert the new entry, and
* acquire txLock as appropriate.
*/
/*
- * split root leaf page:
+ * split root leaf page:
*/
if (sp->header.flag & BT_ROOT) {
/*
@@ -1012,7 +1012,7 @@ static int dtSplitUp(tid_t tid,
}
/*
- * extend first leaf page
+ * extend first leaf page
*
* extend the 1st extent if less than buffer page size
* (dtExtendPage() reurns leaf page unpinned)
@@ -1068,7 +1068,7 @@ static int dtSplitUp(tid_t tid,
}
/*
- * split leaf page <sp> into <sp> and a new right page <rp>.
+ * split leaf page <sp> into <sp> and a new right page <rp>.
*
* return <rp> pinned and its extent descriptor <rpxd>
*/
@@ -1433,7 +1433,7 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
rp->header.freecnt = rp->header.maxslot - fsi;
/*
- * sequential append at tail: append without split
+ * sequential append at tail: append without split
*
* If splitting the last page on a level because of appending
* a entry to it (skip is maxentry), it's likely that the access is
@@ -1467,7 +1467,7 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
}
/*
- * non-sequential insert (at possibly middle page)
+ * non-sequential insert (at possibly middle page)
*/
/*
@@ -1508,7 +1508,7 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
left = 0;
/*
- * compute fill factor for split pages
+ * compute fill factor for split pages
*
* <nxt> traces the next entry to move to rp
* <off> traces the next entry to stay in sp
@@ -1551,7 +1551,7 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
/* <nxt> poins to the 1st entry to move */
/*
- * move entries to right page
+ * move entries to right page
*
* dtMoveEntry() initializes rp and reserves entry for insertion
*
@@ -1677,7 +1677,7 @@ static int dtExtendPage(tid_t tid,
return (rc);
/*
- * extend the extent
+ * extend the extent
*/
pxdlist = split->pxdlist;
pxd = &pxdlist->pxd[pxdlist->npxd];
@@ -1722,7 +1722,7 @@ static int dtExtendPage(tid_t tid,
}
/*
- * extend the page
+ * extend the page
*/
sp->header.self = *pxd;
@@ -1739,9 +1739,6 @@ static int dtExtendPage(tid_t tid,
/* update buffer extent descriptor of extended page */
xlen = lengthPXD(pxd);
xsize = xlen << JFS_SBI(sb)->l2bsize;
-#ifdef _STILL_TO_PORT
- bmSetXD(smp, xaddr, xsize);
-#endif /* _STILL_TO_PORT */
/*
* copy old stbl to new stbl at start of extended area
@@ -1836,7 +1833,7 @@ static int dtExtendPage(tid_t tid,
}
/*
- * update parent entry on the parent/root page
+ * update parent entry on the parent/root page
*/
/*
* acquire a transaction lock on the parent/root page
@@ -1904,7 +1901,7 @@ static int dtSplitRoot(tid_t tid,
sp = &JFS_IP(ip)->i_dtroot;
/*
- * allocate/initialize a single (right) child page
+ * allocate/initialize a single (right) child page
*
* N.B. at first split, a one (or two) block to fit new entry
* is allocated; at subsequent split, a full page is allocated;
@@ -1943,7 +1940,7 @@ static int dtSplitRoot(tid_t tid,
rp->header.prev = 0;
/*
- * move in-line root page into new right page extent
+ * move in-line root page into new right page extent
*/
/* linelock header + copied entries + new stbl (1st slot) in new page */
ASSERT(dtlck->index == 0);
@@ -2016,7 +2013,7 @@ static int dtSplitRoot(tid_t tid,
dtInsertEntry(rp, split->index, split->key, split->data, &dtlck);
/*
- * reset parent/root page
+ * reset parent/root page
*
* set the 1st entry offset to 0, which force the left-most key
* at any level of the tree to be less than any search key.
@@ -2102,7 +2099,7 @@ int dtDelete(tid_t tid,
dtpage_t *np;
/*
- * search for the entry to delete:
+ * search for the entry to delete:
*
* dtSearch() returns (leaf page pinned, index at which to delete).
*/
@@ -2253,7 +2250,7 @@ static int dtDeleteUp(tid_t tid, struct inode *ip,
int i;
/*
- * keep the root leaf page which has become empty
+ * keep the root leaf page which has become empty
*/
if (BT_IS_ROOT(fmp)) {
/*
@@ -2269,7 +2266,7 @@ static int dtDeleteUp(tid_t tid, struct inode *ip,
}
/*
- * free the non-root leaf page
+ * free the non-root leaf page
*/
/*
* acquire a transaction lock on the page
@@ -2299,7 +2296,7 @@ static int dtDeleteUp(tid_t tid, struct inode *ip,
discard_metapage(fmp);
/*
- * propagate page deletion up the directory tree
+ * propagate page deletion up the directory tree
*
* If the delete from the parent page makes it empty,
* continue all the way up the tree.
@@ -2440,10 +2437,10 @@ static int dtDeleteUp(tid_t tid, struct inode *ip,
#ifdef _NOTYET
/*
- * NAME: dtRelocate()
+ * NAME: dtRelocate()
*
- * FUNCTION: relocate dtpage (internal or leaf) of directory;
- * This function is mainly used by defragfs utility.
+ * FUNCTION: relocate dtpage (internal or leaf) of directory;
+ * This function is mainly used by defragfs utility.
*/
int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
s64 nxaddr)
@@ -2471,8 +2468,8 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
xlen);
/*
- * 1. get the internal parent dtpage covering
- * router entry for the tartget page to be relocated;
+ * 1. get the internal parent dtpage covering
+ * router entry for the tartget page to be relocated;
*/
rc = dtSearchNode(ip, lmxaddr, opxd, &btstack);
if (rc)
@@ -2483,7 +2480,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
jfs_info("dtRelocate: parent router entry validated.");
/*
- * 2. relocate the target dtpage
+ * 2. relocate the target dtpage
*/
/* read in the target page from src extent */
DT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc);
@@ -2581,9 +2578,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
/* update the buffer extent descriptor of the dtpage */
xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize;
-#ifdef _STILL_TO_PORT
- bmSetXD(mp, nxaddr, xsize);
-#endif /* _STILL_TO_PORT */
+
/* unpin the relocated page */
DT_PUTPAGE(mp);
jfs_info("dtRelocate: target dtpage relocated.");
@@ -2594,7 +2589,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
*/
/*
- * 3. acquire maplock for the source extent to be freed;
+ * 3. acquire maplock for the source extent to be freed;
*/
/* for dtpage relocation, write a LOG_NOREDOPAGE record
* for the source dtpage (logredo() will init NoRedoPage
@@ -2609,7 +2604,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
pxdlock->index = 1;
/*
- * 4. update the parent router entry for relocation;
+ * 4. update the parent router entry for relocation;
*
* acquire tlck for the parent entry covering the target dtpage;
* write LOG_REDOPAGE to apply after image only;
@@ -2637,7 +2632,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
* NAME: dtSearchNode()
*
* FUNCTION: Search for an dtpage containing a specified address
- * This function is mainly used by defragfs utility.
+ * This function is mainly used by defragfs utility.
*
* NOTE: Search result on stack, the found page is pinned at exit.
* The result page must be an internal dtpage.
@@ -2660,7 +2655,7 @@ static int dtSearchNode(struct inode *ip, s64 lmxaddr, pxd_t * kpxd,
BT_CLR(btstack); /* reset stack */
/*
- * descend tree to the level with specified leftmost page
+ * descend tree to the level with specified leftmost page
*
* by convention, root bn = 0.
*/
@@ -2699,7 +2694,7 @@ static int dtSearchNode(struct inode *ip, s64 lmxaddr, pxd_t * kpxd,
}
/*
- * search each page at the current levevl
+ * search each page at the current levevl
*/
loop:
stbl = DT_GETSTBL(p);
@@ -3044,9 +3039,9 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
if (DO_INDEX(ip)) {
/*
* persistent index is stored in directory entries.
- * Special cases: 0 = .
- * 1 = ..
- * -1 = End of directory
+ * Special cases: 0 = .
+ * 1 = ..
+ * -1 = End of directory
*/
do_index = 1;
@@ -3128,10 +3123,10 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
/*
* Legacy filesystem - OS/2 & Linux JFS < 0.3.6
*
- * pn = index = 0: First entry "."
- * pn = 0; index = 1: Second entry ".."
- * pn > 0: Real entries, pn=1 -> leftmost page
- * pn = index = -1: No more entries
+ * pn = index = 0: First entry "."
+ * pn = 0; index = 1: Second entry ".."
+ * pn > 0: Real entries, pn=1 -> leftmost page
+ * pn = index = -1: No more entries
*/
dtpos = filp->f_pos;
if (dtpos == 0) {
@@ -3351,7 +3346,7 @@ static int dtReadFirst(struct inode *ip, struct btstack * btstack)
BT_CLR(btstack); /* reset stack */
/*
- * descend leftmost path of the tree
+ * descend leftmost path of the tree
*
* by convention, root bn = 0.
*/
@@ -4531,7 +4526,7 @@ int dtModify(tid_t tid, struct inode *ip,
struct ldtentry *entry;
/*
- * search for the entry to modify:
+ * search for the entry to modify:
*
* dtSearch() returns (leaf page pinned, index at which to modify).
*/
diff --git a/fs/jfs/jfs_dtree.h b/fs/jfs/jfs_dtree.h
index af8513f7864..8561c6ecece 100644
--- a/fs/jfs/jfs_dtree.h
+++ b/fs/jfs/jfs_dtree.h
@@ -35,7 +35,7 @@ typedef union {
/*
- * entry segment/slot
+ * entry segment/slot
*
* an entry consists of type dependent head/only segment/slot and
* additional segments/slots linked vi next field;
diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c
index a35bdca6a80..7ae1e3281de 100644
--- a/fs/jfs/jfs_extent.c
+++ b/fs/jfs/jfs_extent.c
@@ -34,8 +34,8 @@ static int extBrealloc(struct inode *, s64, s64, s64 *, s64 *);
#endif
static s64 extRoundDown(s64 nb);
-#define DPD(a) (printk("(a): %d\n",(a)))
-#define DPC(a) (printk("(a): %c\n",(a)))
+#define DPD(a) (printk("(a): %d\n",(a)))
+#define DPC(a) (printk("(a): %c\n",(a)))
#define DPL1(a) \
{ \
if ((a) >> 32) \
@@ -51,19 +51,19 @@ static s64 extRoundDown(s64 nb);
printk("(a): %x\n",(a) << 32); \
}
-#define DPD1(a) (printk("(a): %d ",(a)))
-#define DPX(a) (printk("(a): %08x\n",(a)))
-#define DPX1(a) (printk("(a): %08x ",(a)))
-#define DPS(a) (printk("%s\n",(a)))
-#define DPE(a) (printk("\nENTERING: %s\n",(a)))
-#define DPE1(a) (printk("\nENTERING: %s",(a)))
-#define DPS1(a) (printk(" %s ",(a)))
+#define DPD1(a) (printk("(a): %d ",(a)))
+#define DPX(a) (printk("(a): %08x\n",(a)))
+#define DPX1(a) (printk("(a): %08x ",(a)))
+#define DPS(a) (printk("%s\n",(a)))
+#define DPE(a) (printk("\nENTERING: %s\n",(a)))
+#define DPE1(a) (printk("\nENTERING: %s",(a)))
+#define DPS1(a) (printk(" %s ",(a)))
/*
* NAME: extAlloc()
*
- * FUNCTION: allocate an extent for a specified page range within a
+ * FUNCTION: allocate an extent for a specified page range within a
* file.
*
* PARAMETERS:
@@ -78,9 +78,9 @@ static s64 extRoundDown(s64 nb);
* should be marked as allocated but not recorded.
*
* RETURN VALUES:
- * 0 - success
- * -EIO - i/o error.
- * -ENOSPC - insufficient disk resources.
+ * 0 - success
+ * -EIO - i/o error.
+ * -ENOSPC - insufficient disk resources.
*/
int
extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr)
@@ -192,9 +192,9 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr)
#ifdef _NOTYET
/*
- * NAME: extRealloc()
+ * NAME: extRealloc()
*
- * FUNCTION: extend the allocation of a file extent containing a
+ * FUNCTION: extend the allocation of a file extent containing a
* partial back last page.
*
* PARAMETERS:
@@ -207,9 +207,9 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr)
* should be marked as allocated but not recorded.
*
* RETURN VALUES:
- * 0 - success
- * -EIO - i/o error.
- * -ENOSPC - insufficient disk resources.
+ * 0 - success
+ * -EIO - i/o error.
+ * -ENOSPC - insufficient disk resources.
*/
int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, bool abnr)
{
@@ -345,9 +345,9 @@ exit:
/*
- * NAME: extHint()
+ * NAME: extHint()
*
- * FUNCTION: produce an extent allocation hint for a file offset.
+ * FUNCTION: produce an extent allocation hint for a file offset.
*
* PARAMETERS:
* ip - the inode of the file.
@@ -356,8 +356,8 @@ exit:
* the hint.
*
* RETURN VALUES:
- * 0 - success
- * -EIO - i/o error.
+ * 0 - success
+ * -EIO - i/o error.
*/
int extHint(struct inode *ip, s64 offset, xad_t * xp)
{
@@ -387,7 +387,7 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp)
lxdl.nlxd = 1;
lxdl.lxd = &lxd;
LXDoffset(&lxd, prev)
- LXDlength(&lxd, nbperpage);
+ LXDlength(&lxd, nbperpage);
xadl.maxnxad = 1;
xadl.nxad = 0;
@@ -397,11 +397,11 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp)
if ((rc = xtLookupList(ip, &lxdl, &xadl, 0)))
return (rc);
- /* check if not extent exists for the previous page.
+ /* check if no extent exists for the previous page.
* this is possible for sparse files.
*/
if (xadl.nxad == 0) {
-// assert(ISSPARSE(ip));
+// assert(ISSPARSE(ip));
return (0);
}
@@ -410,28 +410,28 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp)
*/
xp->flag &= XAD_NOTRECORDED;
- if(xadl.nxad != 1 || lengthXAD(xp) != nbperpage) {
+ if(xadl.nxad != 1 || lengthXAD(xp) != nbperpage) {
jfs_error(ip->i_sb, "extHint: corrupt xtree");
return -EIO;
- }
+ }
return (0);
}
/*
- * NAME: extRecord()
+ * NAME: extRecord()
*
- * FUNCTION: change a page with a file from not recorded to recorded.
+ * FUNCTION: change a page with a file from not recorded to recorded.
*
* PARAMETERS:
* ip - inode of the file.
* cp - cbuf of the file page.
*
* RETURN VALUES:
- * 0 - success
- * -EIO - i/o error.
- * -ENOSPC - insufficient disk resources.
+ * 0 - success
+ * -EIO - i/o error.
+ * -ENOSPC - insufficient disk resources.
*/
int extRecord(struct inode *ip, xad_t * xp)
{
@@ -451,9 +451,9 @@ int extRecord(struct inode *ip, xad_t * xp)
#ifdef _NOTYET
/*
- * NAME: extFill()
+ * NAME: extFill()
*
- * FUNCTION: allocate disk space for a file page that represents
+ * FUNCTION: allocate disk space for a file page that represents
* a file hole.
*
* PARAMETERS:
@@ -461,16 +461,16 @@ int extRecord(struct inode *ip, xad_t * xp)
* cp - cbuf of the file page represent the hole.
*
* RETURN VALUES:
- * 0 - success
- * -EIO - i/o error.
- * -ENOSPC - insufficient disk resources.
+ * 0 - success
+ * -EIO - i/o error.
+ * -ENOSPC - insufficient disk resources.
*/
int extFill(struct inode *ip, xad_t * xp)
{
int rc, nbperpage = JFS_SBI(ip->i_sb)->nbperpage;
s64 blkno = offsetXAD(xp) >> ip->i_blkbits;
-// assert(ISSPARSE(ip));
+// assert(ISSPARSE(ip));
/* initialize the extent allocation hint */
XADaddress(xp, 0);
@@ -489,7 +489,7 @@ int extFill(struct inode *ip, xad_t * xp)
/*
* NAME: extBalloc()
*
- * FUNCTION: allocate disk blocks to form an extent.
+ * FUNCTION: allocate disk blocks to form an extent.
*
* initially, we will try to allocate disk blocks for the
* requested size (nblocks). if this fails (nblocks
@@ -513,9 +513,9 @@ int extFill(struct inode *ip, xad_t * xp)
* allocated block range.
*
* RETURN VALUES:
- * 0 - success
- * -EIO - i/o error.
- * -ENOSPC - insufficient disk resources.
+ * 0 - success
+ * -EIO - i/o error.
+ * -ENOSPC - insufficient disk resources.
*/
static int
extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
@@ -580,7 +580,7 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
/*
* NAME: extBrealloc()
*
- * FUNCTION: attempt to extend an extent's allocation.
+ * FUNCTION: attempt to extend an extent's allocation.
*
* Initially, we will try to extend the extent's allocation
* in place. If this fails, we'll try to move the extent
@@ -597,8 +597,8 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
*
* PARAMETERS:
* ip - the inode of the file.
- * blkno - starting block number of the extents current allocation.
- * nblks - number of blocks within the extents current allocation.
+ * blkno - starting block number of the extents current allocation.
+ * nblks - number of blocks within the extents current allocation.
* newnblks - pointer to a s64 value. on entry, this value is the
* the new desired extent size (number of blocks). on
* successful exit, this value is set to the extent's actual
@@ -606,9 +606,9 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
* newblkno - the starting block number of the extents new allocation.
*
* RETURN VALUES:
- * 0 - success
- * -EIO - i/o error.
- * -ENOSPC - insufficient disk resources.
+ * 0 - success
+ * -EIO - i/o error.
+ * -ENOSPC - insufficient disk resources.
*/
static int
extBrealloc(struct inode *ip,
@@ -634,16 +634,16 @@ extBrealloc(struct inode *ip,
/*
- * NAME: extRoundDown()
+ * NAME: extRoundDown()
*
- * FUNCTION: round down a specified number of blocks to the next
+ * FUNCTION: round down a specified number of blocks to the next
* smallest power of 2 number.
*
* PARAMETERS:
* nb - the inode of the file.
*
* RETURN VALUES:
- * next smallest power of 2 number.
+ * next smallest power of 2 number.
*/
static s64 extRoundDown(s64 nb)
{
diff --git a/fs/jfs/jfs_filsys.h b/fs/jfs/jfs_filsys.h
index 38f70ac03be..b3f5463fbe5 100644
--- a/fs/jfs/jfs_filsys.h
+++ b/fs/jfs/jfs_filsys.h
@@ -34,9 +34,9 @@
#define JFS_UNICODE 0x00000001 /* unicode name */
/* mount time flags for error handling */
-#define JFS_ERR_REMOUNT_RO 0x00000002 /* remount read-only */
-#define JFS_ERR_CONTINUE 0x00000004 /* continue */
-#define JFS_ERR_PANIC 0x00000008 /* panic */
+#define JFS_ERR_REMOUNT_RO 0x00000002 /* remount read-only */
+#define JFS_ERR_CONTINUE 0x00000004 /* continue */
+#define JFS_ERR_PANIC 0x00000008 /* panic */
/* Quota support */
#define JFS_USRQUOTA 0x00000010
@@ -83,7 +83,6 @@
/* case-insensitive name/directory support */
#define JFS_AIX 0x80000000 /* AIX support */
-/* POSIX name/directory support - Never implemented*/
/*
* buffer cache configuration
@@ -113,10 +112,10 @@
#define IDATASIZE 256 /* inode inline data size */
#define IXATTRSIZE 128 /* inode inline extended attribute size */
-#define XTPAGE_SIZE 4096
-#define log2_PAGESIZE 12
+#define XTPAGE_SIZE 4096
+#define log2_PAGESIZE 12
-#define IAG_SIZE 4096
+#define IAG_SIZE 4096
#define IAG_EXTENT_SIZE 4096
#define INOSPERIAG 4096 /* number of disk inodes per iag */
#define L2INOSPERIAG 12 /* l2 number of disk inodes per iag */
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index c6530227cda..3870ba8b908 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -93,21 +93,21 @@ static int copy_from_dinode(struct dinode *, struct inode *);
static void copy_to_dinode(struct dinode *, struct inode *);
/*
- * NAME: diMount()
+ * NAME: diMount()
*
- * FUNCTION: initialize the incore inode map control structures for
+ * FUNCTION: initialize the incore inode map control structures for
* a fileset or aggregate init time.
*
- * the inode map's control structure (dinomap) is
- * brought in from disk and placed in virtual memory.
+ * the inode map's control structure (dinomap) is
+ * brought in from disk and placed in virtual memory.
*
* PARAMETERS:
- * ipimap - pointer to inode map inode for the aggregate or fileset.
+ * ipimap - pointer to inode map inode for the aggregate or fileset.
*
* RETURN VALUES:
- * 0 - success
- * -ENOMEM - insufficient free virtual memory.
- * -EIO - i/o error.
+ * 0 - success
+ * -ENOMEM - insufficient free virtual memory.
+ * -EIO - i/o error.
*/
int diMount(struct inode *ipimap)
{
@@ -180,18 +180,18 @@ int diMount(struct inode *ipimap)
/*
- * NAME: diUnmount()
+ * NAME: diUnmount()
*
- * FUNCTION: write to disk the incore inode map control structures for
+ * FUNCTION: write to disk the incore inode map control structures for
* a fileset or aggregate at unmount time.
*
* PARAMETERS:
- * ipimap - pointer to inode map inode for the aggregate or fileset.
+ * ipimap - pointer to inode map inode for the aggregate or fileset.
*
* RETURN VALUES:
- * 0 - success
- * -ENOMEM - insufficient free virtual memory.
- * -EIO - i/o error.
+ * 0 - success
+ * -ENOMEM - insufficient free virtual memory.
+ * -EIO - i/o error.
*/
int diUnmount(struct inode *ipimap, int mounterror)
{
@@ -274,9 +274,9 @@ int diSync(struct inode *ipimap)
/*
- * NAME: diRead()
+ * NAME: diRead()
*
- * FUNCTION: initialize an incore inode from disk.
+ * FUNCTION: initialize an incore inode from disk.
*
* on entry, the specifed incore inode should itself
* specify the disk inode number corresponding to the
@@ -285,7 +285,7 @@ int diSync(struct inode *ipimap)
* this routine handles incore inode initialization for
* both "special" and "regular" inodes. special inodes
* are those required early in the mount process and
- * require special handling since much of the file system
+ * require special handling since much of the file system
* is not yet initialized. these "special" inodes are
* identified by a NULL inode map inode pointer and are
* actually initialized by a call to diReadSpecial().
@@ -298,12 +298,12 @@ int diSync(struct inode *ipimap)
* incore inode.
*
* PARAMETERS:
- * ip - pointer to incore inode to be initialized from disk.
+ * ip - pointer to incore inode to be initialized from disk.
*
* RETURN VALUES:
- * 0 - success
- * -EIO - i/o error.
- * -ENOMEM - insufficient memory
+ * 0 - success
+ * -EIO - i/o error.
+ * -ENOMEM - insufficient memory
*
*/
int diRead(struct inode *ip)
@@ -410,26 +410,26 @@ int diRead(struct inode *ip)
/*
- * NAME: diReadSpecial()
+ * NAME: diReadSpecial()
*
- * FUNCTION: initialize a 'special' inode from disk.
+ * FUNCTION: initialize a 'special' inode from disk.
*
* this routines handles aggregate level inodes. The
* inode cache cannot differentiate between the
* aggregate inodes and the filesystem inodes, so we
* handle these here. We don't actually use the aggregate
- * inode map, since these inodes are at a fixed location
+ * inode map, since these inodes are at a fixed location
* and in some cases the aggregate inode map isn't initialized
* yet.
*
* PARAMETERS:
- * sb - filesystem superblock
+ * sb - filesystem superblock
* inum - aggregate inode number
* secondary - 1 if secondary aggregate inode table
*
* RETURN VALUES:
- * new inode - success
- * NULL - i/o error.
+ * new inode - success
+ * NULL - i/o error.
*/
struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
{
@@ -502,12 +502,12 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
}
/*
- * NAME: diWriteSpecial()
+ * NAME: diWriteSpecial()
*
- * FUNCTION: Write the special inode to disk
+ * FUNCTION: Write the special inode to disk
*
* PARAMETERS:
- * ip - special inode
+ * ip - special inode
* secondary - 1 if secondary aggregate inode table
*
* RETURN VALUES: none
@@ -554,9 +554,9 @@ void diWriteSpecial(struct inode *ip, int secondary)
}
/*
- * NAME: diFreeSpecial()
+ * NAME: diFreeSpecial()
*
- * FUNCTION: Free allocated space for special inode
+ * FUNCTION: Free allocated space for special inode
*/
void diFreeSpecial(struct inode *ip)
{
@@ -572,9 +572,9 @@ void diFreeSpecial(struct inode *ip)
/*
- * NAME: diWrite()
+ * NAME: diWrite()
*
- * FUNCTION: write the on-disk inode portion of the in-memory inode
+ * FUNCTION: write the on-disk inode portion of the in-memory inode
* to its corresponding on-disk inode.
*
* on entry, the specifed incore inode should itself
@@ -589,11 +589,11 @@ void diFreeSpecial(struct inode *ip)
*
* PARAMETERS:
* tid - transacation id
- * ip - pointer to incore inode to be written to the inode extent.
+ * ip - pointer to incore inode to be written to the inode extent.
*
* RETURN VALUES:
- * 0 - success
- * -EIO - i/o error.
+ * 0 - success
+ * -EIO - i/o error.
*/
int diWrite(tid_t tid, struct inode *ip)
{
@@ -730,7 +730,7 @@ int diWrite(tid_t tid, struct inode *ip)
ilinelock = (struct linelock *) & tlck->lock;
/*
- * regular file: 16 byte (XAD slot) granularity
+ * regular file: 16 byte (XAD slot) granularity
*/
if (type & tlckXTREE) {
xtpage_t *p, *xp;
@@ -755,7 +755,7 @@ int diWrite(tid_t tid, struct inode *ip)
xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
}
/*
- * directory: 32 byte (directory entry slot) granularity
+ * directory: 32 byte (directory entry slot) granularity
*/
else if (type & tlckDTREE) {
dtpage_t *p, *xp;
@@ -800,9 +800,8 @@ int diWrite(tid_t tid, struct inode *ip)
}
/*
- * lock/copy inode base: 128 byte slot granularity
+ * lock/copy inode base: 128 byte slot granularity
*/
-// baseDinode:
lv = & dilinelock->lv[dilinelock->index];
lv->offset = dioffset >> L2INODESLOTSIZE;
copy_to_dinode(dp, ip);
@@ -813,17 +812,6 @@ int diWrite(tid_t tid, struct inode *ip)
lv->length = 1;
dilinelock->index++;
-#ifdef _JFS_FASTDASD
- /*
- * We aren't logging changes to the DASD used in directory inodes,
- * but we need to write them to disk. If we don't unmount cleanly,
- * mount will recalculate the DASD used.
- */
- if (S_ISDIR(ip->i_mode)
- && (ip->i_ipmnt->i_mntflag & JFS_DASD_ENABLED))
- memcpy(&dp->di_DASD, &ip->i_DASD, sizeof(struct dasd));
-#endif /* _JFS_FASTDASD */
-
/* release the buffer holding the updated on-disk inode.
* the buffer will be later written by commit processing.
*/
@@ -834,9 +822,9 @@ int diWrite(tid_t tid, struct inode *ip)
/*
- * NAME: diFree(ip)
+ * NAME: diFree(ip)
*
- * FUNCTION: free a specified inode from the inode working map
+ * FUNCTION: free a specified inode from the inode working map
* for a fileset or aggregate.
*
* if the inode to be freed represents the first (only)
@@ -865,11 +853,11 @@ int diWrite(tid_t tid, struct inode *ip)
* any updates and are held until all updates are complete.
*
* PARAMETERS:
- * ip - inode to be freed.
+ * ip - inode to be freed.
*
* RETURN VALUES:
- * 0 - success
- * -EIO - i/o error.
+ * 0 - success
+ * -EIO - i/o error.
*/
int diFree(struct inode *ip)
{
@@ -902,7 +890,8 @@ int diFree(struct inode *ip)
* the map.
*/
if (iagno >= imap->im_nextiag) {
- dump_mem("imap", imap, 32);
+ print_hex_dump(KERN_ERR, "imap: ", DUMP_PREFIX_ADDRESS, 16, 4,
+ imap, 32, 0);
jfs_error(ip->i_sb,
"diFree: inum = %d, iagno = %d, nextiag = %d",
(uint) inum, iagno, imap->im_nextiag);
@@ -964,8 +953,8 @@ int diFree(struct inode *ip)
return -EIO;
}
/*
- * inode extent still has some inodes or below low water mark:
- * keep the inode extent;
+ * inode extent still has some inodes or below low water mark:
+ * keep the inode extent;
*/
if (bitmap ||
imap->im_agctl[agno].numfree < 96 ||
@@ -1047,12 +1036,12 @@ int diFree(struct inode *ip)
/*
- * inode extent has become free and above low water mark:
- * free the inode extent;
+ * inode extent has become free and above low water mark:
+ * free the inode extent;
*/
/*
- * prepare to update iag list(s) (careful update step 1)
+ * prepare to update iag list(s) (careful update step 1)
*/
amp = bmp = cmp = dmp = NULL;
fwd = back = -1;
@@ -1152,7 +1141,7 @@ int diFree(struct inode *ip)
invalidate_pxd_metapages(ip, freepxd);
/*
- * update iag list(s) (careful update step 2)
+ * update iag list(s) (careful update step 2)
*/
/* add the iag to the ag extent free list if this is the
* first free extent for the iag.
@@ -1338,20 +1327,20 @@ diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp)
/*
- * NAME: diAlloc(pip,dir,ip)
+ * NAME: diAlloc(pip,dir,ip)
*
- * FUNCTION: allocate a disk inode from the inode working map
+ * FUNCTION: allocate a disk inode from the inode working map
* for a fileset or aggregate.
*
* PARAMETERS:
- * pip - pointer to incore inode for the parent inode.
- * dir - 'true' if the new disk inode is for a directory.
- * ip - pointer to a new inode
+ * pip - pointer to incore inode for the parent inode.
+ * dir - 'true' if the new disk inode is for a directory.
+ * ip - pointer to a new inode
*
* RETURN VALUES:
- * 0 - success.
- * -ENOSPC - insufficient disk resources.
- * -EIO - i/o error.
+ * 0 - success.
+ * -ENOSPC - insufficient disk resources.
+ * -EIO - i/o error.
*/
int diAlloc(struct inode *pip, bool dir, struct inode *ip)
{
@@ -1433,7 +1422,7 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip)
addext = (imap->im_agctl[agno].numfree < 32 && iagp->nfreeexts);
/*
- * try to allocate from the IAG
+ * try to allocate from the IAG
*/
/* check if the inode may be allocated from the iag
* (i.e. the inode has free inodes or new extent can be added).
@@ -1633,9 +1622,9 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip)
/*
- * NAME: diAllocAG(imap,agno,dir,ip)
+ * NAME: diAllocAG(imap,agno,dir,ip)
*
- * FUNCTION: allocate a disk inode from the allocation group.
+ * FUNCTION: allocate a disk inode from the allocation group.
*
* this routine first determines if a new extent of free
* inodes should be added for the allocation group, with
@@ -1649,17 +1638,17 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip)
* PRE CONDITION: Already have the AG lock for this AG.
*
* PARAMETERS:
- * imap - pointer to inode map control structure.
- * agno - allocation group to allocate from.
- * dir - 'true' if the new disk inode is for a directory.
- * ip - pointer to the new inode to be filled in on successful return
+ * imap - pointer to inode map control structure.
+ * agno - allocation group to allocate from.
+ * dir - 'true' if the new disk inode is for a directory.
+ * ip - pointer to the new inode to be filled in on successful return
* with the disk inode number allocated, its extent address
* and the start of the ag.
*
* RETURN VALUES:
- * 0 - success.
- * -ENOSPC - insufficient disk resources.
- * -EIO - i/o error.
+ * 0 - success.
+ * -ENOSPC - insufficient disk resources.
+ * -EIO - i/o error.
*/
static int
diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip)
@@ -1709,9 +1698,9 @@ diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip)
/*
- * NAME: diAllocAny(imap,agno,dir,iap)
+ * NAME: diAllocAny(imap,agno,dir,iap)
*
- * FUNCTION: allocate a disk inode from any other allocation group.
+ * FUNCTION: allocate a disk inode from any other allocation group.
*
* this routine is called when an allocation attempt within
* the primary allocation group has failed. if attempts to
@@ -1719,17 +1708,17 @@ diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip)
* specified primary group.
*
* PARAMETERS:
- * imap - pointer to inode map control structure.
- * agno - primary allocation group (to avoid).
- * dir - 'true' if the new disk inode is for a directory.
- * ip - pointer to a new inode to be filled in on successful return
+ * imap - pointer to inode map control structure.
+ * agno - primary allocation group (to avoid).
+ * dir - 'true' if the new disk inode is for a directory.
+ * ip - pointer to a new inode to be filled in on successful return
* with the disk inode number allocated, its extent address
* and the start of the ag.
*
* RETURN VALUES:
- * 0 - success.
- * -ENOSPC - insufficient disk resources.
- * -EIO - i/o error.
+ * 0 - success.
+ * -ENOSPC - insufficient disk resources.
+ * -EIO - i/o error.
*/
static int
diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip)
@@ -1772,9 +1761,9 @@ diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip)
/*
- * NAME: diAllocIno(imap,agno,ip)
+ * NAME: diAllocIno(imap,agno,ip)
*
- * FUNCTION: allocate a disk inode from the allocation group's free
+ * FUNCTION: allocate a disk inode from the allocation group's free
* inode list, returning an error if this free list is
* empty (i.e. no iags on the list).
*
@@ -1785,16 +1774,16 @@ diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip)
* PRE CONDITION: Already have AG lock for this AG.
*
* PARAMETERS:
- * imap - pointer to inode map control structure.
- * agno - allocation group.
- * ip - pointer to new inode to be filled in on successful return
+ * imap - pointer to inode map control structure.
+ * agno - allocation group.
+ * ip - pointer to new inode to be filled in on successful return
* with the disk inode number allocated, its extent address
* and the start of the ag.
*
* RETURN VALUES:
- * 0 - success.
- * -ENOSPC - insufficient disk resources.
- * -EIO - i/o error.
+ * 0 - success.
+ * -ENOSPC - insufficient disk resources.
+ * -EIO - i/o error.
*/
static int diAllocIno(struct inomap * imap, int agno, struct inode *ip)
{
@@ -1890,7 +1879,7 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip)
/*
- * NAME: diAllocExt(imap,agno,ip)
+ * NAME: diAllocExt(imap,agno,ip)
*
* FUNCTION: add a new extent of free inodes to an iag, allocating
* an inode from this extent to satisfy the current allocation
@@ -1910,16 +1899,16 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip)
* for the purpose of satisfying this request.
*
* PARAMETERS:
- * imap - pointer to inode map control structure.
- * agno - allocation group number.
- * ip - pointer to new inode to be filled in on successful return
+ * imap - pointer to inode map control structure.
+ * agno - allocation group number.
+ * ip - pointer to new inode to be filled in on successful return
* with the disk inode number allocated, its extent address
* and the start of the ag.
*
* RETURN VALUES:
- * 0 - success.
- * -ENOSPC - insufficient disk resources.
- * -EIO - i/o error.
+ * 0 - success.
+ * -ENOSPC - insufficient disk resources.
+ * -EIO - i/o error.
*/
static int diAllocExt(struct inomap * imap, int agno, struct inode *ip)
{
@@ -2010,7 +1999,7 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip)
/*
- * NAME: diAllocBit(imap,iagp,ino)
+ * NAME: diAllocBit(imap,iagp,ino)
*
* FUNCTION: allocate a backed inode from an iag.
*
@@ -2030,14 +2019,14 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip)
* this AG. Must have read lock on imap inode.
*
* PARAMETERS:
- * imap - pointer to inode map control structure.
- * iagp - pointer to iag.
- * ino - inode number to be allocated within the iag.
+ * imap - pointer to inode map control structure.
+ * iagp - pointer to iag.
+ * ino - inode number to be allocated within the iag.
*
* RETURN VALUES:
- * 0 - success.
- * -ENOSPC - insufficient disk resources.
- * -EIO - i/o error.
+ * 0 - success.
+ * -ENOSPC - insufficient disk resources.
+ * -EIO - i/o error.
*/
static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino)
{
@@ -2144,11 +2133,11 @@ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino)
/*
- * NAME: diNewExt(imap,iagp,extno)
+ * NAME: diNewExt(imap,iagp,extno)
*
- * FUNCTION: initialize a new extent of inodes for an iag, allocating
- * the first inode of the extent for use for the current
- * allocation request.
+ * FUNCTION: initialize a new extent of inodes for an iag, allocating
+ * the first inode of the extent for use for the current
+ * allocation request.
*
* disk resources are allocated for the new extent of inodes
* and the inodes themselves are initialized to reflect their
@@ -2177,14 +2166,14 @@ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino)
* this AG. Must have read lock on imap inode.
*
* PARAMETERS:
- * imap - pointer to inode map control structure.
- * iagp - pointer to iag.
- * extno - extent number.
+ * imap - pointer to inode map control structure.
+ * iagp - pointer to iag.
+ * extno - extent number.
*
* RETURN VALUES:
- * 0 - success.
- * -ENOSPC - insufficient disk resources.
- * -EIO - i/o error.
+ * 0 - success.
+ * -ENOSPC - insufficient disk resources.
+ * -EIO - i/o error.
*/
static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
{
@@ -2430,7 +2419,7 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
/*
- * NAME: diNewIAG(imap,iagnop,agno)
+ * NAME: diNewIAG(imap,iagnop,agno)
*
* FUNCTION: allocate a new iag for an allocation group.
*
@@ -2443,16 +2432,16 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
* and returned to satisfy the request.
*
* PARAMETERS:
- * imap - pointer to inode map control structure.
- * iagnop - pointer to an iag number set with the number of the
+ * imap - pointer to inode map control structure.
+ * iagnop - pointer to an iag number set with the number of the
* newly allocated iag upon successful return.
- * agno - allocation group number.
+ * agno - allocation group number.
* bpp - Buffer pointer to be filled in with new IAG's buffer
*
* RETURN VALUES:
- * 0 - success.
- * -ENOSPC - insufficient disk resources.
- * -EIO - i/o error.
+ * 0 - success.
+ * -ENOSPC - insufficient disk resources.
+ * -EIO - i/o error.
*
* serialization:
* AG lock held on entry/exit;
@@ -2461,7 +2450,7 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
*
* note: new iag transaction:
* . synchronously write iag;
- * . write log of xtree and inode of imap;
+ * . write log of xtree and inode of imap;
* . commit;
* . synchronous write of xtree (right to left, bottom to top);
* . at start of logredo(): init in-memory imap with one additional iag page;
@@ -2481,9 +2470,6 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
s64 xaddr = 0;
s64 blkno;
tid_t tid;
-#ifdef _STILL_TO_PORT
- xad_t xad;
-#endif /* _STILL_TO_PORT */
struct inode *iplist[1];
/* pick up pointers to the inode map and mount inodes */
@@ -2674,15 +2660,15 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
}
/*
- * NAME: diIAGRead()
+ * NAME: diIAGRead()
*
- * FUNCTION: get the buffer for the specified iag within a fileset
+ * FUNCTION: get the buffer for the specified iag within a fileset
* or aggregate inode map.
*
* PARAMETERS:
- * imap - pointer to inode map control structure.
- * iagno - iag number.
- * bpp - point to buffer pointer to be filled in on successful
+ * imap - pointer to inode map control structure.
+ * iagno - iag number.
+ * bpp - point to buffer pointer to be filled in on successful
* exit.
*
* SERIALIZATION:
@@ -2691,8 +2677,8 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
* the read lock is unnecessary.)
*
* RETURN VALUES:
- * 0 - success.
- * -EIO - i/o error.
+ * 0 - success.
+ * -EIO - i/o error.
*/
static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp)
{
@@ -2712,17 +2698,17 @@ static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp)
}
/*
- * NAME: diFindFree()
+ * NAME: diFindFree()
*
- * FUNCTION: find the first free bit in a word starting at
+ * FUNCTION: find the first free bit in a word starting at
* the specified bit position.
*
* PARAMETERS:
- * word - word to be examined.
- * start - starting bit position.
+ * word - word to be examined.
+ * start - starting bit position.
*
* RETURN VALUES:
- * bit position of first free bit in the word or 32 if
+ * bit position of first free bit in the word or 32 if
* no free bits were found.
*/
static int diFindFree(u32 word, int start)
@@ -2897,7 +2883,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap)
atomic_read(&imap->im_numfree));
/*
- * reconstruct imap
+ * reconstruct imap
*
* coalesce contiguous k (newAGSize/oldAGSize) AGs;
* i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn;
@@ -2913,7 +2899,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap)
}
/*
- * process each iag page of the map.
+ * process each iag page of the map.
*
* rebuild AG Free Inode List, AG Free Inode Extent List;
*/
@@ -2932,7 +2918,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap)
/* leave free iag in the free iag list */
if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
- release_metapage(bp);
+ release_metapage(bp);
continue;
}
@@ -3063,13 +3049,13 @@ static void duplicateIXtree(struct super_block *sb, s64 blkno,
}
/*
- * NAME: copy_from_dinode()
+ * NAME: copy_from_dinode()
*
- * FUNCTION: Copies inode info from disk inode to in-memory inode
+ * FUNCTION: Copies inode info from disk inode to in-memory inode
*
* RETURN VALUES:
- * 0 - success
- * -ENOMEM - insufficient memory
+ * 0 - success
+ * -ENOMEM - insufficient memory
*/
static int copy_from_dinode(struct dinode * dip, struct inode *ip)
{
@@ -3151,9 +3137,9 @@ static int copy_from_dinode(struct dinode * dip, struct inode *ip)
}
/*
- * NAME: copy_to_dinode()
+ * NAME: copy_to_dinode()
*
- * FUNCTION: Copies inode info from in-memory inode to disk inode
+ * FUNCTION: Copies inode info from in-memory inode to disk inode
*/
static void copy_to_dinode(struct dinode * dip, struct inode *ip)
{
diff --git a/fs/jfs/jfs_imap.h b/fs/jfs/jfs_imap.h
index 4f9c346ed49..610a0e9d894 100644
--- a/fs/jfs/jfs_imap.h
+++ b/fs/jfs/jfs_imap.h
@@ -24,17 +24,17 @@
* jfs_imap.h: disk inode manager
*/
-#define EXTSPERIAG 128 /* number of disk inode extent per iag */
-#define IMAPBLKNO 0 /* lblkno of dinomap within inode map */
-#define SMAPSZ 4 /* number of words per summary map */
+#define EXTSPERIAG 128 /* number of disk inode extent per iag */
+#define IMAPBLKNO 0 /* lblkno of dinomap within inode map */
+#define SMAPSZ 4 /* number of words per summary map */
#define EXTSPERSUM 32 /* number of extents per summary map entry */
#define L2EXTSPERSUM 5 /* l2 number of extents per summary map */
#define PGSPERIEXT 4 /* number of 4K pages per dinode extent */
-#define MAXIAGS ((1<<20)-1) /* maximum number of iags */
-#define MAXAG 128 /* maximum number of allocation groups */
+#define MAXIAGS ((1<<20)-1) /* maximum number of iags */
+#define MAXAG 128 /* maximum number of allocation groups */
-#define AMAPSIZE 512 /* bytes in the IAG allocation maps */
-#define SMAPSIZE 16 /* bytes in the IAG summary maps */
+#define AMAPSIZE 512 /* bytes in the IAG allocation maps */
+#define SMAPSIZE 16 /* bytes in the IAG summary maps */
/* convert inode number to iag number */
#define INOTOIAG(ino) ((ino) >> L2INOSPERIAG)
@@ -60,31 +60,31 @@
* inode allocation group page (per 4096 inodes of an AG)
*/
struct iag {
- __le64 agstart; /* 8: starting block of ag */
- __le32 iagnum; /* 4: inode allocation group number */
- __le32 inofreefwd; /* 4: ag inode free list forward */
- __le32 inofreeback; /* 4: ag inode free list back */
- __le32 extfreefwd; /* 4: ag inode extent free list forward */
- __le32 extfreeback; /* 4: ag inode extent free list back */
- __le32 iagfree; /* 4: iag free list */
+ __le64 agstart; /* 8: starting block of ag */
+ __le32 iagnum; /* 4: inode allocation group number */
+ __le32 inofreefwd; /* 4: ag inode free list forward */
+ __le32 inofreeback; /* 4: ag inode free list back */
+ __le32 extfreefwd; /* 4: ag inode extent free list forward */
+ __le32 extfreeback; /* 4: ag inode extent free list back */
+ __le32 iagfree; /* 4: iag free list */
/* summary map: 1 bit per inode extent */
__le32 inosmap[SMAPSZ]; /* 16: sum map of mapwords w/ free inodes;
- * note: this indicates free and backed
- * inodes, if the extent is not backed the
- * value will be 1. if the extent is
- * backed but all inodes are being used the
- * value will be 1. if the extent is
- * backed but at least one of the inodes is
- * free the value will be 0.
+ * note: this indicates free and backed
+ * inodes, if the extent is not backed the
+ * value will be 1. if the extent is
+ * backed but all inodes are being used the
+ * value will be 1. if the extent is
+ * backed but at least one of the inodes is
+ * free the value will be 0.
*/
__le32 extsmap[SMAPSZ]; /* 16: sum map of mapwords w/ free extents */
- __le32 nfreeinos; /* 4: number of free inodes */
- __le32 nfreeexts; /* 4: number of free extents */
+ __le32 nfreeinos; /* 4: number of free inodes */
+ __le32 nfreeexts; /* 4: number of free extents */
/* (72) */
u8 pad[1976]; /* 1976: pad to 2048 bytes */
/* allocation bit map: 1 bit per inode (0 - free, 1 - allocated) */
- __le32 wmap[EXTSPERIAG]; /* 512: working allocation map */
+ __le32 wmap[EXTSPERIAG]; /* 512: working allocation map */
__le32 pmap[EXTSPERIAG]; /* 512: persistent allocation map */
pxd_t inoext[EXTSPERIAG]; /* 1024: inode extent addresses */
}; /* (4096) */
@@ -93,44 +93,44 @@ struct iag {
* per AG control information (in inode map control page)
*/
struct iagctl_disk {
- __le32 inofree; /* 4: free inode list anchor */
- __le32 extfree; /* 4: free extent list anchor */
- __le32 numinos; /* 4: number of backed inodes */
- __le32 numfree; /* 4: number of free inodes */
+ __le32 inofree; /* 4: free inode list anchor */
+ __le32 extfree; /* 4: free extent list anchor */
+ __le32 numinos; /* 4: number of backed inodes */
+ __le32 numfree; /* 4: number of free inodes */
}; /* (16) */
struct iagctl {
- int inofree; /* free inode list anchor */
- int extfree; /* free extent list anchor */
- int numinos; /* number of backed inodes */
- int numfree; /* number of free inodes */
+ int inofree; /* free inode list anchor */
+ int extfree; /* free extent list anchor */
+ int numinos; /* number of backed inodes */
+ int numfree; /* number of free inodes */
};
/*
* per fileset/aggregate inode map control page
*/
struct dinomap_disk {
- __le32 in_freeiag; /* 4: free iag list anchor */
- __le32 in_nextiag; /* 4: next free iag number */
- __le32 in_numinos; /* 4: num of backed inodes */
+ __le32 in_freeiag; /* 4: free iag list anchor */
+ __le32 in_nextiag; /* 4: next free iag number */
+ __le32 in_numinos; /* 4: num of backed inodes */
__le32 in_numfree; /* 4: num of free backed inodes */
__le32 in_nbperiext; /* 4: num of blocks per inode extent */
- __le32 in_l2nbperiext; /* 4: l2 of in_nbperiext */
- __le32 in_diskblock; /* 4: for standalone test driver */
- __le32 in_maxag; /* 4: for standalone test driver */
- u8 pad[2016]; /* 2016: pad to 2048 */
+ __le32 in_l2nbperiext; /* 4: l2 of in_nbperiext */
+ __le32 in_diskblock; /* 4: for standalone test driver */
+ __le32 in_maxag; /* 4: for standalone test driver */
+ u8 pad[2016]; /* 2016: pad to 2048 */
struct iagctl_disk in_agctl[MAXAG]; /* 2048: AG control information */
}; /* (4096) */
struct dinomap {
- int in_freeiag; /* free iag list anchor */
- int in_nextiag; /* next free iag number */
- int in_numinos; /* num of backed inodes */
- int in_numfree; /* num of free backed inodes */
+ int in_freeiag; /* free iag list anchor */
+ int in_nextiag; /* next free iag number */
+ int in_numinos; /* num of backed inodes */
+ int in_numfree; /* num of free backed inodes */
int in_nbperiext; /* num of blocks per inode extent */
- int in_l2nbperiext; /* l2 of in_nbperiext */
- int in_diskblock; /* for standalone test driver */
- int in_maxag; /* for standalone test driver */
+ int in_l2nbperiext; /* l2 of in_nbperiext */
+ int in_diskblock; /* for standalone test driver */
+ int in_maxag; /* for standalone test driver */
struct iagctl in_agctl[MAXAG]; /* AG control information */
};
@@ -139,9 +139,9 @@ struct dinomap {
*/
struct inomap {
struct dinomap im_imap; /* 4096: inode allocation control */
- struct inode *im_ipimap; /* 4: ptr to inode for imap */
- struct mutex im_freelock; /* 4: iag free list lock */
- struct mutex im_aglock[MAXAG]; /* 512: per AG locks */
+ struct inode *im_ipimap; /* 4: ptr to inode for imap */
+ struct mutex im_freelock; /* 4: iag free list lock */
+ struct mutex im_aglock[MAXAG]; /* 512: per AG locks */
u32 *im_DBGdimap;
atomic_t im_numinos; /* num of backed inodes */
atomic_t im_numfree; /* num of free backed inodes */
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index 8f453eff3c8..cb8f30985ad 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -40,7 +40,7 @@ struct jfs_inode_info {
uint mode2; /* jfs-specific mode */
uint saved_uid; /* saved for uid mount option */
uint saved_gid; /* saved for gid mount option */
- pxd_t ixpxd; /* inode extent descriptor */
+ pxd_t ixpxd; /* inode extent descriptor */
dxd_t acl; /* dxd describing acl */
dxd_t ea; /* dxd describing ea */
time_t otime; /* time created */
@@ -190,7 +190,7 @@ struct jfs_sb_info {
uint gengen; /* inode generation generator*/
uint inostamp; /* shows inode belongs to fileset*/
- /* Formerly in ipbmap */
+ /* Formerly in ipbmap */
struct bmap *bmap; /* incore bmap descriptor */
struct nls_table *nls_tab; /* current codepage */
struct inode *direct_inode; /* metadata inode */
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 44a2f33cb98..de3e4a506db 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -244,7 +244,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
goto writeRecord;
/*
- * initialize/update page/transaction recovery lsn
+ * initialize/update page/transaction recovery lsn
*/
lsn = log->lsn;
@@ -263,7 +263,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
}
/*
- * initialize/update lsn of tblock of the page
+ * initialize/update lsn of tblock of the page
*
* transaction inherits oldest lsn of pages associated
* with allocation/deallocation of resources (their
@@ -307,7 +307,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
LOGSYNC_UNLOCK(log, flags);
/*
- * write the log record
+ * write the log record
*/
writeRecord:
lsn = lmWriteRecord(log, tblk, lrd, tlck);
@@ -372,7 +372,7 @@ lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
goto moveLrd;
/*
- * move log record data
+ * move log record data
*/
/* retrieve source meta-data page to log */
if (tlck->flag & tlckPAGELOCK) {
@@ -465,7 +465,7 @@ lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
}
/*
- * move log record descriptor
+ * move log record descriptor
*/
moveLrd:
lrd->length = cpu_to_le16(len);
@@ -574,7 +574,7 @@ static int lmNextPage(struct jfs_log * log)
LOGGC_LOCK(log);
/*
- * write or queue the full page at the tail of write queue
+ * write or queue the full page at the tail of write queue
*/
/* get the tail tblk on commit queue */
if (list_empty(&log->cqueue))
@@ -625,7 +625,7 @@ static int lmNextPage(struct jfs_log * log)
LOGGC_UNLOCK(log);
/*
- * allocate/initialize next page
+ * allocate/initialize next page
*/
/* if log wraps, the first data page of log is 2
* (0 never used, 1 is superblock).
@@ -953,7 +953,7 @@ static int lmLogSync(struct jfs_log * log, int hard_sync)
}
/*
- * forward syncpt
+ * forward syncpt
*/
/* if last sync is same as last syncpt,
* invoke sync point forward processing to update sync.
@@ -989,7 +989,7 @@ static int lmLogSync(struct jfs_log * log, int hard_sync)
lsn = log->lsn;
/*
- * setup next syncpt trigger (SWAG)
+ * setup next syncpt trigger (SWAG)
*/
logsize = log->logsize;
@@ -1000,11 +1000,11 @@ static int lmLogSync(struct jfs_log * log, int hard_sync)
if (more < 2 * LOGPSIZE) {
jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n");
/*
- * log wrapping
+ * log wrapping
*
* option 1 - panic ? No.!
* option 2 - shutdown file systems
- * associated with log ?
+ * associated with log ?
* option 3 - extend log ?
*/
/*
@@ -1062,7 +1062,7 @@ void jfs_syncpt(struct jfs_log *log, int hard_sync)
/*
* NAME: lmLogOpen()
*
- * FUNCTION: open the log on first open;
+ * FUNCTION: open the log on first open;
* insert filesystem in the active list of the log.
*
* PARAMETER: ipmnt - file system mount inode
@@ -1113,7 +1113,7 @@ int lmLogOpen(struct super_block *sb)
init_waitqueue_head(&log->syncwait);
/*
- * external log as separate logical volume
+ * external log as separate logical volume
*
* file systems to log may have n-to-1 relationship;
*/
@@ -1155,7 +1155,7 @@ journal_found:
return 0;
/*
- * unwind on error
+ * unwind on error
*/
shutdown: /* unwind lbmLogInit() */
list_del(&log->journal_list);
@@ -1427,7 +1427,7 @@ int lmLogInit(struct jfs_log * log)
return 0;
/*
- * unwind on error
+ * unwind on error
*/
errout30: /* release log page */
log->wqueue = NULL;
@@ -1480,7 +1480,7 @@ int lmLogClose(struct super_block *sb)
if (test_bit(log_INLINELOG, &log->flag)) {
/*
- * in-line log in host file system
+ * in-line log in host file system
*/
rc = lmLogShutdown(log);
kfree(log);
@@ -1504,7 +1504,7 @@ int lmLogClose(struct super_block *sb)
goto out;
/*
- * external log as separate logical volume
+ * external log as separate logical volume
*/
list_del(&log->journal_list);
bdev = log->bdev;
@@ -1622,20 +1622,26 @@ void jfs_flush_journal(struct jfs_log *log, int wait)
if (!list_empty(&log->synclist)) {
struct logsyncblk *lp;
+ printk(KERN_ERR "jfs_flush_journal: synclist not empty\n");
list_for_each_entry(lp, &log->synclist, synclist) {
if (lp->xflag & COMMIT_PAGE) {
struct metapage *mp = (struct metapage *)lp;
- dump_mem("orphan metapage", lp,
- sizeof(struct metapage));
- dump_mem("page", mp->page, sizeof(struct page));
- }
- else
- dump_mem("orphan tblock", lp,
- sizeof(struct tblock));
+ print_hex_dump(KERN_ERR, "metapage: ",
+ DUMP_PREFIX_ADDRESS, 16, 4,
+ mp, sizeof(struct metapage), 0);
+ print_hex_dump(KERN_ERR, "page: ",
+ DUMP_PREFIX_ADDRESS, 16,
+ sizeof(long), mp->page,
+ sizeof(struct page), 0);
+ } else
+ print_hex_dump(KERN_ERR, "tblock:",
+ DUMP_PREFIX_ADDRESS, 16, 4,
+ lp, sizeof(struct tblock), 0);
}
}
+#else
+ WARN_ON(!list_empty(&log->synclist));
#endif
- //assert(list_empty(&log->synclist));
clear_bit(log_FLUSH, &log->flag);
}
@@ -1723,7 +1729,7 @@ int lmLogShutdown(struct jfs_log * log)
*
* PARAMETE: log - pointer to logs inode.
* fsdev - kdev_t of filesystem.
- * serial - pointer to returned log serial number
+ * serial - pointer to returned log serial number
* activate - insert/remove device from active list.
*
* RETURN: 0 - success
@@ -1963,7 +1969,7 @@ static void lbmfree(struct lbuf * bp)
* FUNCTION: add a log buffer to the log redrive list
*
* PARAMETER:
- * bp - log buffer
+ * bp - log buffer
*
* NOTES:
* Takes log_redrive_lock.
@@ -2054,7 +2060,7 @@ static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag,
bp->l_flag = flag;
/*
- * insert bp at tail of write queue associated with log
+ * insert bp at tail of write queue associated with log
*
* (request is either for bp already/currently at head of queue
* or new bp to be inserted at tail)
@@ -2117,7 +2123,7 @@ static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
/*
- * initiate pageout of the page
+ * initiate pageout of the page
*/
lbmStartIO(bp);
}
@@ -2128,7 +2134,7 @@ static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
*
* FUNCTION: Interface to DD strategy routine
*
- * RETURN: none
+ * RETURN: none
*
* serialization: LCACHE_LOCK() is NOT held during log i/o;
*/
@@ -2222,7 +2228,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
bio_put(bio);
/*
- * pagein completion
+ * pagein completion
*/
if (bp->l_flag & lbmREAD) {
bp->l_flag &= ~lbmREAD;
@@ -2236,7 +2242,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
}
/*
- * pageout completion
+ * pageout completion
*
* the bp at the head of write queue has completed pageout.
*
@@ -2302,7 +2308,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
}
/*
- * synchronous pageout:
+ * synchronous pageout:
*
* buffer has not necessarily been removed from write queue
* (e.g., synchronous write of partial-page with COMMIT):
@@ -2316,7 +2322,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
}
/*
- * Group Commit pageout:
+ * Group Commit pageout:
*/
else if (bp->l_flag & lbmGC) {
LCACHE_UNLOCK(flags);
@@ -2324,7 +2330,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
}
/*
- * asynchronous pageout:
+ * asynchronous pageout:
*
* buffer must have been removed from write queue:
* insert buffer at head of freelist where it can be recycled
@@ -2375,7 +2381,7 @@ int jfsIOWait(void *arg)
* FUNCTION: format file system log
*
* PARAMETERS:
- * log - volume log
+ * log - volume log
* logAddress - start address of log space in FS block
* logSize - length of log space in FS block;
*
@@ -2407,16 +2413,16 @@ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
npages = logSize >> sbi->l2nbperpage;
/*
- * log space:
+ * log space:
*
* page 0 - reserved;
* page 1 - log superblock;
* page 2 - log data page: A SYNC log record is written
- * into this page at logform time;
+ * into this page at logform time;
* pages 3-N - log data page: set to empty log data pages;
*/
/*
- * init log superblock: log page 1
+ * init log superblock: log page 1
*/
logsuper = (struct logsuper *) bp->l_ldata;
@@ -2436,7 +2442,7 @@ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
goto exit;
/*
- * init pages 2 to npages-1 as log data pages:
+ * init pages 2 to npages-1 as log data pages:
*
* log page sequence number (lpsn) initialization:
*
@@ -2479,7 +2485,7 @@ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
goto exit;
/*
- * initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
+ * initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
*/
for (lspn = 0; lspn < npages - 3; lspn++) {
lp->h.page = lp->t.page = cpu_to_le32(lspn);
@@ -2495,7 +2501,7 @@ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
rc = 0;
exit:
/*
- * finalize log
+ * finalize log
*/
/* release the buffer */
lbmFree(bp);
diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h
index a53fb17ea21..1f85ef0ec04 100644
--- a/fs/jfs/jfs_logmgr.h
+++ b/fs/jfs/jfs_logmgr.h
@@ -144,7 +144,7 @@ struct logpage {
*
* (this comment should be rewritten !)
* jfs uses only "after" log records (only a single writer is allowed
- * in a page, pages are written to temporary paging space if
+ * in a page, pages are written to temporary paging space if
* if they must be written to disk before commit, and i/o is
* scheduled for modified pages to their home location after
* the log records containing the after values and the commit
@@ -153,7 +153,7 @@ struct logpage {
*
* a log record consists of a data area of variable length followed by
* a descriptor of fixed size LOGRDSIZE bytes.
- * the data area is rounded up to an integral number of 4-bytes and
+ * the data area is rounded up to an integral number of 4-bytes and
* must be no longer than LOGPSIZE.
* the descriptor is of size of multiple of 4-bytes and aligned on a
* 4-byte boundary.
@@ -215,13 +215,13 @@ struct lrd {
union {
/*
- * COMMIT: commit
+ * COMMIT: commit
*
* transaction commit: no type-dependent information;
*/
/*
- * REDOPAGE: after-image
+ * REDOPAGE: after-image
*
* apply after-image;
*
@@ -236,7 +236,7 @@ struct lrd {
} redopage; /* (20) */
/*
- * NOREDOPAGE: the page is freed
+ * NOREDOPAGE: the page is freed
*
* do not apply after-image records which precede this record
* in the log with the same page block number to this page.
@@ -252,7 +252,7 @@ struct lrd {
} noredopage; /* (20) */
/*
- * UPDATEMAP: update block allocation map
+ * UPDATEMAP: update block allocation map
*
* either in-line PXD,
* or out-of-line XADLIST;
@@ -268,7 +268,7 @@ struct lrd {
} updatemap; /* (20) */
/*
- * NOREDOINOEXT: the inode extent is freed
+ * NOREDOINOEXT: the inode extent is freed
*
* do not apply after-image records which precede this
* record in the log with the any of the 4 page block
@@ -286,7 +286,7 @@ struct lrd {
} noredoinoext; /* (20) */
/*
- * SYNCPT: log sync point
+ * SYNCPT: log sync point
*
* replay log upto syncpt address specified;
*/
@@ -295,13 +295,13 @@ struct lrd {
} syncpt;
/*
- * MOUNT: file system mount
+ * MOUNT: file system mount
*
* file system mount: no type-dependent information;
*/
/*
- * ? FREEXTENT: free specified extent(s)
+ * ? FREEXTENT: free specified extent(s)
*
* free specified extent(s) from block allocation map
* N.B.: nextents should be length of data/sizeof(xad_t)
@@ -314,7 +314,7 @@ struct lrd {
} freextent;
/*
- * ? NOREDOFILE: this file is freed
+ * ? NOREDOFILE: this file is freed
*
* do not apply records which precede this record in the log
* with the same inode number.
@@ -330,7 +330,7 @@ struct lrd {
} noredofile;
/*
- * ? NEWPAGE:
+ * ? NEWPAGE:
*
* metadata type dependent
*/
@@ -342,7 +342,7 @@ struct lrd {
} newpage;
/*
- * ? DUMMY: filler
+ * ? DUMMY: filler
*
* no type-dependent information
*/
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 43d4f69afbe..77c7f1129dd 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -472,7 +472,8 @@ add_failed:
printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n");
goto skip;
dump_bio:
- dump_mem("bio", bio, sizeof(*bio));
+ print_hex_dump(KERN_ERR, "JFS: dump of bio: ", DUMP_PREFIX_ADDRESS, 16,
+ 4, bio, sizeof(*bio), 0);
skip:
bio_put(bio);
unlock_page(page);
diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c
index 4dd47983489..644429acb8c 100644
--- a/fs/jfs/jfs_mount.c
+++ b/fs/jfs/jfs_mount.c
@@ -80,7 +80,7 @@ static int logMOUNT(struct super_block *sb);
*/
int jfs_mount(struct super_block *sb)
{
- int rc = 0; /* Return code */
+ int rc = 0; /* Return code */
struct jfs_sb_info *sbi = JFS_SBI(sb);
struct inode *ipaimap = NULL;
struct inode *ipaimap2 = NULL;
@@ -169,7 +169,7 @@ int jfs_mount(struct super_block *sb)
sbi->ipaimap2 = NULL;
/*
- * mount (the only/single) fileset
+ * mount (the only/single) fileset
*/
/*
* open fileset inode allocation map (aka fileset inode)
@@ -195,7 +195,7 @@ int jfs_mount(struct super_block *sb)
goto out;
/*
- * unwind on error
+ * unwind on error
*/
errout41: /* close fileset inode allocation map inode */
diFreeSpecial(ipimap);
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index 25430d0b0d5..7aa1f7004ea 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -18,7 +18,7 @@
*/
/*
- * jfs_txnmgr.c: transaction manager
+ * jfs_txnmgr.c: transaction manager
*
* notes:
* transaction starts with txBegin() and ends with txCommit()
@@ -60,7 +60,7 @@
#include "jfs_debug.h"
/*
- * transaction management structures
+ * transaction management structures
*/
static struct {
int freetid; /* index of a free tid structure */
@@ -103,19 +103,19 @@ module_param(nTxLock, int, 0);
MODULE_PARM_DESC(nTxLock,
"Number of transaction locks (max:65536)");
-struct tblock *TxBlock; /* transaction block table */
-static int TxLockLWM; /* Low water mark for number of txLocks used */
-static int TxLockHWM; /* High water mark for number of txLocks used */
-static int TxLockVHWM; /* Very High water mark */
-struct tlock *TxLock; /* transaction lock table */
+struct tblock *TxBlock; /* transaction block table */
+static int TxLockLWM; /* Low water mark for number of txLocks used */
+static int TxLockHWM; /* High water mark for number of txLocks used */
+static int TxLockVHWM; /* Very High water mark */
+struct tlock *TxLock; /* transaction lock table */
/*
- * transaction management lock
+ * transaction management lock
*/
static DEFINE_SPINLOCK(jfsTxnLock);
-#define TXN_LOCK() spin_lock(&jfsTxnLock)
-#define TXN_UNLOCK() spin_unlock(&jfsTxnLock)
+#define TXN_LOCK() spin_lock(&jfsTxnLock)
+#define TXN_UNLOCK() spin_unlock(&jfsTxnLock)
#define LAZY_LOCK_INIT() spin_lock_init(&TxAnchor.LazyLock);
#define LAZY_LOCK(flags) spin_lock_irqsave(&TxAnchor.LazyLock, flags)
@@ -148,7 +148,7 @@ static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event)
#define TXN_WAKEUP(event) wake_up_all(event)
/*
- * statistics
+ * statistics
*/
static struct {
tid_t maxtid; /* 4: biggest tid ever used */
@@ -181,8 +181,8 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
static void LogSyncRelease(struct metapage * mp);
/*
- * transaction block/lock management
- * ---------------------------------
+ * transaction block/lock management
+ * ---------------------------------
*/
/*
@@ -227,9 +227,9 @@ static void txLockFree(lid_t lid)
}
/*
- * NAME: txInit()
+ * NAME: txInit()
*
- * FUNCTION: initialize transaction management structures
+ * FUNCTION: initialize transaction management structures
*
* RETURN:
*
@@ -333,9 +333,9 @@ int txInit(void)
}
/*
- * NAME: txExit()
+ * NAME: txExit()
*
- * FUNCTION: clean up when module is unloaded
+ * FUNCTION: clean up when module is unloaded
*/
void txExit(void)
{
@@ -346,12 +346,12 @@ void txExit(void)
}
/*
- * NAME: txBegin()
+ * NAME: txBegin()
*
- * FUNCTION: start a transaction.
+ * FUNCTION: start a transaction.
*
- * PARAMETER: sb - superblock
- * flag - force for nested tx;
+ * PARAMETER: sb - superblock
+ * flag - force for nested tx;
*
* RETURN: tid - transaction id
*
@@ -447,13 +447,13 @@ tid_t txBegin(struct super_block *sb, int flag)
}
/*
- * NAME: txBeginAnon()
+ * NAME: txBeginAnon()
*
- * FUNCTION: start an anonymous transaction.
+ * FUNCTION: start an anonymous transaction.
* Blocks if logsync or available tlocks are low to prevent
* anonymous tlocks from depleting supply.
*
- * PARAMETER: sb - superblock
+ * PARAMETER: sb - superblock
*
* RETURN: none
*/
@@ -489,11 +489,11 @@ void txBeginAnon(struct super_block *sb)
}
/*
- * txEnd()
+ * txEnd()
*
* function: free specified transaction block.
*
- * logsync barrier processing:
+ * logsync barrier processing:
*
* serialization:
*/
@@ -577,13 +577,13 @@ wakeup:
}
/*
- * txLock()
+ * txLock()
*
* function: acquire a transaction lock on the specified <mp>
*
* parameter:
*
- * return: transaction lock id
+ * return: transaction lock id
*
* serialization:
*/
@@ -829,12 +829,16 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
/* Only locks on ipimap or ipaimap should reach here */
/* assert(jfs_ip->fileset == AGGREGATE_I); */
if (jfs_ip->fileset != AGGREGATE_I) {
- jfs_err("txLock: trying to lock locked page!");
- dump_mem("ip", ip, sizeof(struct inode));
- dump_mem("mp", mp, sizeof(struct metapage));
- dump_mem("Locker's tblk", tid_to_tblock(tid),
- sizeof(struct tblock));
- dump_mem("Tlock", tlck, sizeof(struct tlock));
+ printk(KERN_ERR "txLock: trying to lock locked page!");
+ print_hex_dump(KERN_ERR, "ip: ", DUMP_PREFIX_ADDRESS, 16, 4,
+ ip, sizeof(*ip), 0);
+ print_hex_dump(KERN_ERR, "mp: ", DUMP_PREFIX_ADDRESS, 16, 4,
+ mp, sizeof(*mp), 0);
+ print_hex_dump(KERN_ERR, "Locker's tblock: ",
+ DUMP_PREFIX_ADDRESS, 16, 4, tid_to_tblock(tid),
+ sizeof(struct tblock), 0);
+ print_hex_dump(KERN_ERR, "Tlock: ", DUMP_PREFIX_ADDRESS, 16, 4,
+ tlck, sizeof(*tlck), 0);
BUG();
}
INCREMENT(stattx.waitlock); /* statistics */
@@ -857,17 +861,17 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
}
/*
- * NAME: txRelease()
+ * NAME: txRelease()
*
- * FUNCTION: Release buffers associated with transaction locks, but don't
+ * FUNCTION: Release buffers associated with transaction locks, but don't
* mark homeok yet. The allows other transactions to modify
* buffers, but won't let them go to disk until commit record
* actually gets written.
*
* PARAMETER:
- * tblk -
+ * tblk -
*
- * RETURN: Errors from subroutines.
+ * RETURN: Errors from subroutines.
*/
static void txRelease(struct tblock * tblk)
{
@@ -896,10 +900,10 @@ static void txRelease(struct tblock * tblk)
}
/*
- * NAME: txUnlock()
+ * NAME: txUnlock()
*
- * FUNCTION: Initiates pageout of pages modified by tid in journalled
- * objects and frees their lockwords.
+ * FUNCTION: Initiates pageout of pages modified by tid in journalled
+ * objects and frees their lockwords.
*/
static void txUnlock(struct tblock * tblk)
{
@@ -983,10 +987,10 @@ static void txUnlock(struct tblock * tblk)
}
/*
- * txMaplock()
+ * txMaplock()
*
* function: allocate a transaction lock for freed page/entry;
- * for freed page, maplock is used as xtlock/dtlock type;
+ * for freed page, maplock is used as xtlock/dtlock type;
*/
struct tlock *txMaplock(tid_t tid, struct inode *ip, int type)
{
@@ -1057,7 +1061,7 @@ struct tlock *txMaplock(tid_t tid, struct inode *ip, int type)
}
/*
- * txLinelock()
+ * txLinelock()
*
* function: allocate a transaction lock for log vector list
*/
@@ -1092,39 +1096,39 @@ struct linelock *txLinelock(struct linelock * tlock)
}
/*
- * transaction commit management
- * -----------------------------
+ * transaction commit management
+ * -----------------------------
*/
/*
- * NAME: txCommit()
- *
- * FUNCTION: commit the changes to the objects specified in
- * clist. For journalled segments only the
- * changes of the caller are committed, ie by tid.
- * for non-journalled segments the data are flushed to
- * disk and then the change to the disk inode and indirect
- * blocks committed (so blocks newly allocated to the
- * segment will be made a part of the segment atomically).
- *
- * all of the segments specified in clist must be in
- * one file system. no more than 6 segments are needed
- * to handle all unix svcs.
- *
- * if the i_nlink field (i.e. disk inode link count)
- * is zero, and the type of inode is a regular file or
- * directory, or symbolic link , the inode is truncated
- * to zero length. the truncation is committed but the
- * VM resources are unaffected until it is closed (see
- * iput and iclose).
+ * NAME: txCommit()
+ *
+ * FUNCTION: commit the changes to the objects specified in
+ * clist. For journalled segments only the
+ * changes of the caller are committed, ie by tid.
+ * for non-journalled segments the data are flushed to
+ * disk and then the change to the disk inode and indirect
+ * blocks committed (so blocks newly allocated to the
+ * segment will be made a part of the segment atomically).
+ *
+ * all of the segments specified in clist must be in
+ * one file system. no more than 6 segments are needed
+ * to handle all unix svcs.
+ *
+ * if the i_nlink field (i.e. disk inode link count)
+ * is zero, and the type of inode is a regular file or
+ * directory, or symbolic link , the inode is truncated
+ * to zero length. the truncation is committed but the
+ * VM resources are unaffected until it is closed (see
+ * iput and iclose).
*
* PARAMETER:
*
* RETURN:
*
* serialization:
- * on entry the inode lock on each segment is assumed
- * to be held.
+ * on entry the inode lock on each segment is assumed
+ * to be held.
*
* i/o error:
*/
@@ -1175,7 +1179,7 @@ int txCommit(tid_t tid, /* transaction identifier */
if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0)
tblk->xflag |= COMMIT_LAZY;
/*
- * prepare non-journaled objects for commit
+ * prepare non-journaled objects for commit
*
* flush data pages of non-journaled file
* to prevent the file getting non-initialized disk blocks
@@ -1186,7 +1190,7 @@ int txCommit(tid_t tid, /* transaction identifier */
cd.nip = nip;
/*
- * acquire transaction lock on (on-disk) inodes
+ * acquire transaction lock on (on-disk) inodes
*
* update on-disk inode from in-memory inode
* acquiring transaction locks for AFTER records
@@ -1262,7 +1266,7 @@ int txCommit(tid_t tid, /* transaction identifier */
}
/*
- * write log records from transaction locks
+ * write log records from transaction locks
*
* txUpdateMap() resets XAD_NEW in XAD.
*/
@@ -1294,7 +1298,7 @@ int txCommit(tid_t tid, /* transaction identifier */
!test_cflag(COMMIT_Nolink, tblk->u.ip)));
/*
- * write COMMIT log record
+ * write COMMIT log record
*/
lrd->type = cpu_to_le16(LOG_COMMIT);
lrd->length = 0;
@@ -1303,7 +1307,7 @@ int txCommit(tid_t tid, /* transaction identifier */
lmGroupCommit(log, tblk);
/*
- * - transaction is now committed -
+ * - transaction is now committed -
*/
/*
@@ -1314,11 +1318,11 @@ int txCommit(tid_t tid, /* transaction identifier */
txForce(tblk);
/*
- * update allocation map.
+ * update allocation map.
*
* update inode allocation map and inode:
* free pager lock on memory object of inode if any.
- * update block allocation map.
+ * update block allocation map.
*
* txUpdateMap() resets XAD_NEW in XAD.
*/
@@ -1326,7 +1330,7 @@ int txCommit(tid_t tid, /* transaction identifier */
txUpdateMap(tblk);
/*
- * free transaction locks and pageout/free pages
+ * free transaction locks and pageout/free pages
*/
txRelease(tblk);
@@ -1335,7 +1339,7 @@ int txCommit(tid_t tid, /* transaction identifier */
/*
- * reset in-memory object state
+ * reset in-memory object state
*/
for (k = 0; k < cd.nip; k++) {
ip = cd.iplist[k];
@@ -1358,11 +1362,11 @@ int txCommit(tid_t tid, /* transaction identifier */
}
/*
- * NAME: txLog()
+ * NAME: txLog()
*
- * FUNCTION: Writes AFTER log records for all lines modified
- * by tid for segments specified by inodes in comdata.
- * Code assumes only WRITELOCKS are recorded in lockwords.
+ * FUNCTION: Writes AFTER log records for all lines modified
+ * by tid for segments specified by inodes in comdata.
+ * Code assumes only WRITELOCKS are recorded in lockwords.
*
* PARAMETERS:
*
@@ -1421,12 +1425,12 @@ static int txLog(struct jfs_log * log, struct tblock * tblk, struct commit * cd)
}
/*
- * diLog()
+ * diLog()
*
- * function: log inode tlock and format maplock to update bmap;
+ * function: log inode tlock and format maplock to update bmap;
*/
static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
- struct tlock * tlck, struct commit * cd)
+ struct tlock * tlck, struct commit * cd)
{
int rc = 0;
struct metapage *mp;
@@ -1442,7 +1446,7 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
pxd = &lrd->log.redopage.pxd;
/*
- * inode after image
+ * inode after image
*/
if (tlck->type & tlckENTRY) {
/* log after-image for logredo(): */
@@ -1456,7 +1460,7 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
tlck->flag |= tlckWRITEPAGE;
} else if (tlck->type & tlckFREE) {
/*
- * free inode extent
+ * free inode extent
*
* (pages of the freed inode extent have been invalidated and
* a maplock for free of the extent has been formatted at
@@ -1498,7 +1502,7 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
jfs_err("diLog: UFO type tlck:0x%p", tlck);
#ifdef _JFS_WIP
/*
- * alloc/free external EA extent
+ * alloc/free external EA extent
*
* a maplock for txUpdateMap() to update bPWMAP for alloc/free
* of the extent has been formatted at txLock() time;
@@ -1534,9 +1538,9 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
}
/*
- * dataLog()
+ * dataLog()
*
- * function: log data tlock
+ * function: log data tlock
*/
static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
struct tlock * tlck)
@@ -1580,9 +1584,9 @@ static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
}
/*
- * dtLog()
+ * dtLog()
*
- * function: log dtree tlock and format maplock to update bmap;
+ * function: log dtree tlock and format maplock to update bmap;
*/
static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
struct tlock * tlck)
@@ -1603,10 +1607,10 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
/*
- * page extension via relocation: entry insertion;
- * page extension in-place: entry insertion;
- * new right page from page split, reinitialized in-line
- * root from root page split: entry insertion;
+ * page extension via relocation: entry insertion;
+ * page extension in-place: entry insertion;
+ * new right page from page split, reinitialized in-line
+ * root from root page split: entry insertion;
*/
if (tlck->type & (tlckNEW | tlckEXTEND)) {
/* log after-image of the new page for logredo():
@@ -1641,8 +1645,8 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
}
/*
- * entry insertion/deletion,
- * sibling page link update (old right page before split);
+ * entry insertion/deletion,
+ * sibling page link update (old right page before split);
*/
if (tlck->type & (tlckENTRY | tlckRELINK)) {
/* log after-image for logredo(): */
@@ -1658,11 +1662,11 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
}
/*
- * page deletion: page has been invalidated
- * page relocation: source extent
+ * page deletion: page has been invalidated
+ * page relocation: source extent
*
- * a maplock for free of the page has been formatted
- * at txLock() time);
+ * a maplock for free of the page has been formatted
+ * at txLock() time);
*/
if (tlck->type & (tlckFREE | tlckRELOCATE)) {
/* log LOG_NOREDOPAGE of the deleted page for logredo()
@@ -1683,9 +1687,9 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
}
/*
- * xtLog()
+ * xtLog()
*
- * function: log xtree tlock and format maplock to update bmap;
+ * function: log xtree tlock and format maplock to update bmap;
*/
static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
struct tlock * tlck)
@@ -1725,8 +1729,8 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
xadlock = (struct xdlistlock *) maplock;
/*
- * entry insertion/extension;
- * sibling page link update (old right page before split);
+ * entry insertion/extension;
+ * sibling page link update (old right page before split);
*/
if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) {
/* log after-image for logredo():
@@ -1801,7 +1805,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
}
/*
- * page deletion: file deletion/truncation (ref. xtTruncate())
+ * page deletion: file deletion/truncation (ref. xtTruncate())
*
* (page will be invalidated after log is written and bmap
* is updated from the page);
@@ -1908,13 +1912,13 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
}
/*
- * page/entry truncation: file truncation (ref. xtTruncate())
+ * page/entry truncation: file truncation (ref. xtTruncate())
*
- * |----------+------+------+---------------|
- * | | |
- * | | hwm - hwm before truncation
- * | next - truncation point
- * lwm - lwm before truncation
+ * |----------+------+------+---------------|
+ * | | |
+ * | | hwm - hwm before truncation
+ * | next - truncation point
+ * lwm - lwm before truncation
* header ?
*/
if (tlck->type & tlckTRUNCATE) {
@@ -1937,7 +1941,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
twm = xtlck->twm.offset;
/*
- * write log records
+ * write log records
*/
/* log after-image for logredo():
*
@@ -1997,7 +2001,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
}
/*
- * format maplock(s) for txUpdateMap() to update bmap
+ * format maplock(s) for txUpdateMap() to update bmap
*/
maplock->index = 0;
@@ -2069,9 +2073,9 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
}
/*
- * mapLog()
+ * mapLog()
*
- * function: log from maplock of freed data extents;
+ * function: log from maplock of freed data extents;
*/
static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
struct tlock * tlck)
@@ -2081,7 +2085,7 @@ static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
pxd_t *pxd;
/*
- * page relocation: free the source page extent
+ * page relocation: free the source page extent
*
* a maplock for txUpdateMap() for free of the page
* has been formatted at txLock() time saving the src
@@ -2155,10 +2159,10 @@ static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
}
/*
- * txEA()
+ * txEA()
*
- * function: acquire maplock for EA/ACL extents or
- * set COMMIT_INLINE flag;
+ * function: acquire maplock for EA/ACL extents or
+ * set COMMIT_INLINE flag;
*/
void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea)
{
@@ -2207,10 +2211,10 @@ void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea)
}
/*
- * txForce()
+ * txForce()
*
* function: synchronously write pages locked by transaction
- * after txLog() but before txUpdateMap();
+ * after txLog() but before txUpdateMap();
*/
static void txForce(struct tblock * tblk)
{
@@ -2273,10 +2277,10 @@ static void txForce(struct tblock * tblk)
}
/*
- * txUpdateMap()
+ * txUpdateMap()
*
- * function: update persistent allocation map (and working map
- * if appropriate);
+ * function: update persistent allocation map (and working map
+ * if appropriate);
*
* parameter:
*/
@@ -2298,7 +2302,7 @@ static void txUpdateMap(struct tblock * tblk)
/*
- * update block allocation map
+ * update block allocation map
*
* update allocation state in pmap (and wmap) and
* update lsn of the pmap page;
@@ -2382,7 +2386,7 @@ static void txUpdateMap(struct tblock * tblk)
}
}
/*
- * update inode allocation map
+ * update inode allocation map
*
* update allocation state in pmap and
* update lsn of the pmap page;
@@ -2407,24 +2411,24 @@ static void txUpdateMap(struct tblock * tblk)
}
/*
- * txAllocPMap()
+ * txAllocPMap()
*
* function: allocate from persistent map;
*
* parameter:
- * ipbmap -
- * malock -
- * xad list:
- * pxd:
- *
- * maptype -
- * allocate from persistent map;
- * free from persistent map;
- * (e.g., tmp file - free from working map at releae
- * of last reference);
- * free from persistent and working map;
- *
- * lsn - log sequence number;
+ * ipbmap -
+ * malock -
+ * xad list:
+ * pxd:
+ *
+ * maptype -
+ * allocate from persistent map;
+ * free from persistent map;
+ * (e.g., tmp file - free from working map at releae
+ * of last reference);
+ * free from persistent and working map;
+ *
+ * lsn - log sequence number;
*/
static void txAllocPMap(struct inode *ip, struct maplock * maplock,
struct tblock * tblk)
@@ -2478,9 +2482,9 @@ static void txAllocPMap(struct inode *ip, struct maplock * maplock,
}
/*
- * txFreeMap()
+ * txFreeMap()
*
- * function: free from persistent and/or working map;
+ * function: free from persistent and/or working map;
*
* todo: optimization
*/
@@ -2579,9 +2583,9 @@ void txFreeMap(struct inode *ip,
}
/*
- * txFreelock()
+ * txFreelock()
*
- * function: remove tlock from inode anonymous locklist
+ * function: remove tlock from inode anonymous locklist
*/
void txFreelock(struct inode *ip)
{
@@ -2619,7 +2623,7 @@ void txFreelock(struct inode *ip)
}
/*
- * txAbort()
+ * txAbort()
*
* function: abort tx before commit;
*
@@ -2679,7 +2683,7 @@ void txAbort(tid_t tid, int dirty)
}
/*
- * txLazyCommit(void)
+ * txLazyCommit(void)
*
* All transactions except those changing ipimap (COMMIT_FORCE) are
* processed by this routine. This insures that the inode and block
@@ -2728,7 +2732,7 @@ static void txLazyCommit(struct tblock * tblk)
}
/*
- * jfs_lazycommit(void)
+ * jfs_lazycommit(void)
*
* To be run as a kernel daemon. If lbmIODone is called in an interrupt
* context, or where blocking is not wanted, this routine will process
@@ -2913,7 +2917,7 @@ void txResume(struct super_block *sb)
}
/*
- * jfs_sync(void)
+ * jfs_sync(void)
*
* To be run as a kernel daemon. This is awakened when tlocks run low.
* We write any inodes that have anonymous tlocks so they will become
diff --git a/fs/jfs/jfs_txnmgr.h b/fs/jfs/jfs_txnmgr.h
index 7863cf21afc..ab728893701 100644
--- a/fs/jfs/jfs_txnmgr.h
+++ b/fs/jfs/jfs_txnmgr.h
@@ -94,7 +94,7 @@ extern struct tblock *TxBlock; /* transaction block table */
*/
struct tlock {
lid_t next; /* 2: index next lockword on tid locklist
- * next lockword on freelist
+ * next lockword on freelist
*/
tid_t tid; /* 2: transaction id holding lock */
diff --git a/fs/jfs/jfs_types.h b/fs/jfs/jfs_types.h
index 09b25295868..649f9817acc 100644
--- a/fs/jfs/jfs_types.h
+++ b/fs/jfs/jfs_types.h
@@ -21,7 +21,7 @@
/*
* jfs_types.h:
*
- * basic type/utility definitions
+ * basic type/utility definitions
*
* note: this header file must be the 1st include file
* of JFS include list in all JFS .c file.
@@ -54,8 +54,8 @@ struct timestruc_t {
*/
#define LEFTMOSTONE 0x80000000
-#define HIGHORDER 0x80000000u /* high order bit on */
-#define ONES 0xffffffffu /* all bit on */
+#define HIGHORDER 0x80000000u /* high order bit on */
+#define ONES 0xffffffffu /* all bit on */
/*
* logical xd (lxd)
@@ -148,7 +148,7 @@ typedef struct {
#define sizeDXD(dxd) le32_to_cpu((dxd)->size)
/*
- * directory entry argument
+ * directory entry argument
*/
struct component_name {
int namlen;
@@ -160,14 +160,14 @@ struct component_name {
* DASD limit information - stored in directory inode
*/
struct dasd {
- u8 thresh; /* Alert Threshold (in percent) */
- u8 delta; /* Alert Threshold delta (in percent) */
+ u8 thresh; /* Alert Threshold (in percent) */
+ u8 delta; /* Alert Threshold delta (in percent) */
u8 rsrvd1;
- u8 limit_hi; /* DASD limit (in logical blocks) */
- __le32 limit_lo; /* DASD limit (in logical blocks) */
+ u8 limit_hi; /* DASD limit (in logical blocks) */
+ __le32 limit_lo; /* DASD limit (in logical blocks) */
u8 rsrvd2[3];
- u8 used_hi; /* DASD usage (in logical blocks) */
- __le32 used_lo; /* DASD usage (in logical blocks) */
+ u8 used_hi; /* DASD usage (in logical blocks) */
+ __le32 used_lo; /* DASD usage (in logical blocks) */
};
#define DASDLIMIT(dasdp) \
diff --git a/fs/jfs/jfs_umount.c b/fs/jfs/jfs_umount.c
index a386f48c73f..7971f37534a 100644
--- a/fs/jfs/jfs_umount.c
+++ b/fs/jfs/jfs_umount.c
@@ -60,7 +60,7 @@ int jfs_umount(struct super_block *sb)
jfs_info("UnMount JFS: sb:0x%p", sb);
/*
- * update superblock and close log
+ * update superblock and close log
*
* if mounted read-write and log based recovery was enabled
*/
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c
index acc97c46d8a..1543906a2e0 100644
--- a/fs/jfs/jfs_xtree.c
+++ b/fs/jfs/jfs_xtree.c
@@ -16,7 +16,7 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
- * jfs_xtree.c: extent allocation descriptor B+-tree manager
+ * jfs_xtree.c: extent allocation descriptor B+-tree manager
*/
#include <linux/fs.h>
@@ -32,30 +32,30 @@
/*
* xtree local flag
*/
-#define XT_INSERT 0x00000001
+#define XT_INSERT 0x00000001
/*
- * xtree key/entry comparison: extent offset
+ * xtree key/entry comparison: extent offset
*
* return:
- * -1: k < start of extent
- * 0: start_of_extent <= k <= end_of_extent
- * 1: k > end_of_extent
+ * -1: k < start of extent
+ * 0: start_of_extent <= k <= end_of_extent
+ * 1: k > end_of_extent
*/
#define XT_CMP(CMP, K, X, OFFSET64)\
{\
- OFFSET64 = offsetXAD(X);\
- (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\
- ((K) < OFFSET64) ? -1 : 0;\
+ OFFSET64 = offsetXAD(X);\
+ (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\
+ ((K) < OFFSET64) ? -1 : 0;\
}
/* write a xad entry */
#define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\
{\
- (XAD)->flag = (FLAG);\
- XADoffset((XAD), (OFF));\
- XADlength((XAD), (LEN));\
- XADaddress((XAD), (ADDR));\
+ (XAD)->flag = (FLAG);\
+ XADoffset((XAD), (OFF));\
+ XADlength((XAD), (LEN));\
+ XADaddress((XAD), (ADDR));\
}
#define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot)
@@ -76,13 +76,13 @@
MP = NULL;\
RC = -EIO;\
}\
- }\
+ }\
}
/* for consistency */
#define XT_PUTPAGE(MP) BT_PUTPAGE(MP)
-#define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \
+#define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \
BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot)
/* xtree entry parameter descriptor */
struct xtsplit {
@@ -97,7 +97,7 @@ struct xtsplit {
/*
- * statistics
+ * statistics
*/
#ifdef CONFIG_JFS_STATISTICS
static struct {
@@ -136,7 +136,7 @@ static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp);
#endif /* _STILL_TO_PORT */
/*
- * xtLookup()
+ * xtLookup()
*
* function: map a single page into a physical extent;
*/
@@ -179,7 +179,7 @@ int xtLookup(struct inode *ip, s64 lstart,
}
/*
- * compute the physical extent covering logical extent
+ * compute the physical extent covering logical extent
*
* N.B. search may have failed (e.g., hole in sparse file),
* and returned the index of the next entry.
@@ -220,27 +220,27 @@ int xtLookup(struct inode *ip, s64 lstart,
/*
- * xtLookupList()
+ * xtLookupList()
*
* function: map a single logical extent into a list of physical extent;
*
* parameter:
- * struct inode *ip,
- * struct lxdlist *lxdlist, lxd list (in)
- * struct xadlist *xadlist, xad list (in/out)
- * int flag)
+ * struct inode *ip,
+ * struct lxdlist *lxdlist, lxd list (in)
+ * struct xadlist *xadlist, xad list (in/out)
+ * int flag)
*
* coverage of lxd by xad under assumption of
* . lxd's are ordered and disjoint.
* . xad's are ordered and disjoint.
*
* return:
- * 0: success
+ * 0: success
*
* note: a page being written (even a single byte) is backed fully,
- * except the last page which is only backed with blocks
- * required to cover the last byte;
- * the extent backing a page is fully contained within an xad;
+ * except the last page which is only backed with blocks
+ * required to cover the last byte;
+ * the extent backing a page is fully contained within an xad;
*/
int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
struct xadlist * xadlist, int flag)
@@ -284,7 +284,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
return rc;
/*
- * compute the physical extent covering logical extent
+ * compute the physical extent covering logical extent
*
* N.B. search may have failed (e.g., hole in sparse file),
* and returned the index of the next entry.
@@ -343,7 +343,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
if (lstart >= size)
goto mapend;
- /* compare with the current xad */
+ /* compare with the current xad */
goto compare1;
}
/* lxd is covered by xad */
@@ -430,7 +430,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
/*
* lxd is partially covered by xad
*/
- else { /* (xend < lend) */
+ else { /* (xend < lend) */
/*
* get next xad
@@ -477,22 +477,22 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
/*
- * xtSearch()
+ * xtSearch()
*
- * function: search for the xad entry covering specified offset.
+ * function: search for the xad entry covering specified offset.
*
* parameters:
- * ip - file object;
- * xoff - extent offset;
- * nextp - address of next extent (if any) for search miss
- * cmpp - comparison result:
- * btstack - traverse stack;
- * flag - search process flag (XT_INSERT);
+ * ip - file object;
+ * xoff - extent offset;
+ * nextp - address of next extent (if any) for search miss
+ * cmpp - comparison result:
+ * btstack - traverse stack;
+ * flag - search process flag (XT_INSERT);
*
* returns:
- * btstack contains (bn, index) of search path traversed to the entry.
- * *cmpp is set to result of comparison with the entry returned.
- * the page containing the entry is pinned at exit.
+ * btstack contains (bn, index) of search path traversed to the entry.
+ * *cmpp is set to result of comparison with the entry returned.
+ * the page containing the entry is pinned at exit.
*/
static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp,
int *cmpp, struct btstack * btstack, int flag)
@@ -517,7 +517,7 @@ static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp,
btstack->nsplit = 0;
/*
- * search down tree from root:
+ * search down tree from root:
*
* between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
* internal page, child page Pi contains entry with k, Ki <= K < Kj.
@@ -642,7 +642,7 @@ static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp,
XT_CMP(cmp, xoff, &p->xad[index], t64);
if (cmp == 0) {
/*
- * search hit
+ * search hit
*/
/* search hit - leaf page:
* return the entry found
@@ -692,7 +692,7 @@ static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp,
}
/*
- * search miss
+ * search miss
*
* base is the smallest index with key (Kj) greater than
* search key (K) and may be zero or maxentry index.
@@ -773,22 +773,22 @@ static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp,
}
/*
- * xtInsert()
+ * xtInsert()
*
* function:
*
* parameter:
- * tid - transaction id;
- * ip - file object;
- * xflag - extent flag (XAD_NOTRECORDED):
- * xoff - extent offset;
- * xlen - extent length;
- * xaddrp - extent address pointer (in/out):
- * if (*xaddrp)
- * caller allocated data extent at *xaddrp;
- * else
- * allocate data extent and return its xaddr;
- * flag -
+ * tid - transaction id;
+ * ip - file object;
+ * xflag - extent flag (XAD_NOTRECORDED):
+ * xoff - extent offset;
+ * xlen - extent length;
+ * xaddrp - extent address pointer (in/out):
+ * if (*xaddrp)
+ * caller allocated data extent at *xaddrp;
+ * else
+ * allocate data extent and return its xaddr;
+ * flag -
*
* return:
*/
@@ -813,7 +813,7 @@ int xtInsert(tid_t tid, /* transaction id */
jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
/*
- * search for the entry location at which to insert:
+ * search for the entry location at which to insert:
*
* xtFastSearch() and xtSearch() both returns (leaf page
* pinned, index at which to insert).
@@ -853,13 +853,13 @@ int xtInsert(tid_t tid, /* transaction id */
}
/*
- * insert entry for new extent
+ * insert entry for new extent
*/
xflag |= XAD_NEW;
/*
- * if the leaf page is full, split the page and
- * propagate up the router entry for the new page from split
+ * if the leaf page is full, split the page and
+ * propagate up the router entry for the new page from split
*
* The xtSplitUp() will insert the entry and unpin the leaf page.
*/
@@ -886,7 +886,7 @@ int xtInsert(tid_t tid, /* transaction id */
}
/*
- * insert the new entry into the leaf page
+ * insert the new entry into the leaf page
*/
/*
* acquire a transaction lock on the leaf page;
@@ -930,16 +930,16 @@ int xtInsert(tid_t tid, /* transaction id */
/*
- * xtSplitUp()
+ * xtSplitUp()
*
* function:
- * split full pages as propagating insertion up the tree
+ * split full pages as propagating insertion up the tree
*
* parameter:
- * tid - transaction id;
- * ip - file object;
- * split - entry parameter descriptor;
- * btstack - traverse stack from xtSearch()
+ * tid - transaction id;
+ * ip - file object;
+ * split - entry parameter descriptor;
+ * btstack - traverse stack from xtSearch()
*
* return:
*/
@@ -1199,22 +1199,22 @@ xtSplitUp(tid_t tid,
/*
- * xtSplitPage()
+ * xtSplitPage()
*
* function:
- * split a full non-root page into
- * original/split/left page and new right page
- * i.e., the original/split page remains as left page.
+ * split a full non-root page into
+ * original/split/left page and new right page
+ * i.e., the original/split page remains as left page.
*
* parameter:
- * int tid,
- * struct inode *ip,
- * struct xtsplit *split,
- * struct metapage **rmpp,
- * u64 *rbnp,
+ * int tid,
+ * struct inode *ip,
+ * struct xtsplit *split,
+ * struct metapage **rmpp,
+ * u64 *rbnp,
*
* return:
- * Pointer to page in which to insert or NULL on error.
+ * Pointer to page in which to insert or NULL on error.
*/
static int
xtSplitPage(tid_t tid, struct inode *ip,
@@ -1248,9 +1248,9 @@ xtSplitPage(tid_t tid, struct inode *ip,
rbn = addressPXD(pxd);
/* Allocate blocks to quota. */
- if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) {
- rc = -EDQUOT;
- goto clean_up;
+ if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) {
+ rc = -EDQUOT;
+ goto clean_up;
}
quota_allocation += lengthPXD(pxd);
@@ -1304,7 +1304,7 @@ xtSplitPage(tid_t tid, struct inode *ip,
skip = split->index;
/*
- * sequential append at tail (after last entry of last page)
+ * sequential append at tail (after last entry of last page)
*
* if splitting the last page on a level because of appending
* a entry to it (skip is maxentry), it's likely that the access is
@@ -1342,7 +1342,7 @@ xtSplitPage(tid_t tid, struct inode *ip,
}
/*
- * non-sequential insert (at possibly middle page)
+ * non-sequential insert (at possibly middle page)
*/
/*
@@ -1465,25 +1465,24 @@ xtSplitPage(tid_t tid, struct inode *ip,
/*
- * xtSplitRoot()
+ * xtSplitRoot()
*
* function:
- * split the full root page into
- * original/root/split page and new right page
- * i.e., root remains fixed in tree anchor (inode) and
- * the root is copied to a single new right child page
- * since root page << non-root page, and
- * the split root page contains a single entry for the
- * new right child page.
+ * split the full root page into original/root/split page and new
+ * right page
+ * i.e., root remains fixed in tree anchor (inode) and the root is
+ * copied to a single new right child page since root page <<
+ * non-root page, and the split root page contains a single entry
+ * for the new right child page.
*
* parameter:
- * int tid,
- * struct inode *ip,
- * struct xtsplit *split,
- * struct metapage **rmpp)
+ * int tid,
+ * struct inode *ip,
+ * struct xtsplit *split,
+ * struct metapage **rmpp)
*
* return:
- * Pointer to page in which to insert or NULL on error.
+ * Pointer to page in which to insert or NULL on error.
*/
static int
xtSplitRoot(tid_t tid,
@@ -1505,7 +1504,7 @@ xtSplitRoot(tid_t tid,
INCREMENT(xtStat.split);
/*
- * allocate a single (right) child page
+ * allocate a single (right) child page
*/
pxdlist = split->pxdlist;
pxd = &pxdlist->pxd[pxdlist->npxd];
@@ -1573,7 +1572,7 @@ xtSplitRoot(tid_t tid,
}
/*
- * reset the root
+ * reset the root
*
* init root with the single entry for the new right page
* set the 1st entry offset to 0, which force the left-most key
@@ -1610,7 +1609,7 @@ xtSplitRoot(tid_t tid,
/*
- * xtExtend()
+ * xtExtend()
*
* function: extend in-place;
*
@@ -1677,7 +1676,7 @@ int xtExtend(tid_t tid, /* transaction id */
goto extendOld;
/*
- * extent overflow: insert entry for new extent
+ * extent overflow: insert entry for new extent
*/
//insertNew:
xoff = offsetXAD(xad) + MAXXLEN;
@@ -1685,8 +1684,8 @@ int xtExtend(tid_t tid, /* transaction id */
nextindex = le16_to_cpu(p->header.nextindex);
/*
- * if the leaf page is full, insert the new entry and
- * propagate up the router entry for the new page from split
+ * if the leaf page is full, insert the new entry and
+ * propagate up the router entry for the new page from split
*
* The xtSplitUp() will insert the entry and unpin the leaf page.
*/
@@ -1731,7 +1730,7 @@ int xtExtend(tid_t tid, /* transaction id */
}
}
/*
- * insert the new entry into the leaf page
+ * insert the new entry into the leaf page
*/
else {
/* insert the new entry: mark the entry NEW */
@@ -1771,11 +1770,11 @@ int xtExtend(tid_t tid, /* transaction id */
#ifdef _NOTYET
/*
- * xtTailgate()
+ * xtTailgate()
*
* function: split existing 'tail' extent
- * (split offset >= start offset of tail extent), and
- * relocate and extend the split tail half;
+ * (split offset >= start offset of tail extent), and
+ * relocate and extend the split tail half;
*
* note: existing extent may or may not have been committed.
* caller is responsible for pager buffer cache update, and
@@ -1804,7 +1803,7 @@ int xtTailgate(tid_t tid, /* transaction id */
/*
printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n",
- (ulong)xoff, xlen, (ulong)xaddr);
+ (ulong)xoff, xlen, (ulong)xaddr);
*/
/* there must exist extent to be tailgated */
@@ -1842,18 +1841,18 @@ printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n",
xad = &p->xad[index];
/*
printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n",
- (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad));
+ (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad));
*/
if ((llen = xoff - offsetXAD(xad)) == 0)
goto updateOld;
/*
- * partially replace extent: insert entry for new extent
+ * partially replace extent: insert entry for new extent
*/
//insertNew:
/*
- * if the leaf page is full, insert the new entry and
- * propagate up the router entry for the new page from split
+ * if the leaf page is full, insert the new entry and
+ * propagate up the router entry for the new page from split
*
* The xtSplitUp() will insert the entry and unpin the leaf page.
*/
@@ -1898,7 +1897,7 @@ printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n",
}
}
/*
- * insert the new entry into the leaf page
+ * insert the new entry into the leaf page
*/
else {
/* insert the new entry: mark the entry NEW */
@@ -1955,17 +1954,17 @@ printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n",
#endif /* _NOTYET */
/*
- * xtUpdate()
+ * xtUpdate()
*
* function: update XAD;
*
- * update extent for allocated_but_not_recorded or
- * compressed extent;
+ * update extent for allocated_but_not_recorded or
+ * compressed extent;
*
* parameter:
- * nxad - new XAD;
- * logical extent of the specified XAD must be completely
- * contained by an existing XAD;
+ * nxad - new XAD;
+ * logical extent of the specified XAD must be completely
+ * contained by an existing XAD;
*/
int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad)
{ /* new XAD */
@@ -2416,19 +2415,19 @@ printf("xtUpdate.updateLeft.split p:0x%p\n", p);
/*
- * xtAppend()
+ * xtAppend()
*
* function: grow in append mode from contiguous region specified ;
*
* parameter:
- * tid - transaction id;
- * ip - file object;
- * xflag - extent flag:
- * xoff - extent offset;
- * maxblocks - max extent length;
- * xlen - extent length (in/out);
- * xaddrp - extent address pointer (in/out):
- * flag -
+ * tid - transaction id;
+ * ip - file object;
+ * xflag - extent flag:
+ * xoff - extent offset;
+ * maxblocks - max extent length;
+ * xlen - extent length (in/out);
+ * xaddrp - extent address pointer (in/out):
+ * flag -
*
* return:
*/
@@ -2460,7 +2459,7 @@ int xtAppend(tid_t tid, /* transaction id */
(ulong) xoff, maxblocks, xlen, (ulong) xaddr);
/*
- * search for the entry location at which to insert:
+ * search for the entry location at which to insert:
*
* xtFastSearch() and xtSearch() both returns (leaf page
* pinned, index at which to insert).
@@ -2482,13 +2481,13 @@ int xtAppend(tid_t tid, /* transaction id */
xlen = min(xlen, (int)(next - xoff));
//insert:
/*
- * insert entry for new extent
+ * insert entry for new extent
*/
xflag |= XAD_NEW;
/*
- * if the leaf page is full, split the page and
- * propagate up the router entry for the new page from split
+ * if the leaf page is full, split the page and
+ * propagate up the router entry for the new page from split
*
* The xtSplitUp() will insert the entry and unpin the leaf page.
*/
@@ -2545,7 +2544,7 @@ int xtAppend(tid_t tid, /* transaction id */
return 0;
/*
- * insert the new entry into the leaf page
+ * insert the new entry into the leaf page
*/
insertLeaf:
/*
@@ -2589,17 +2588,17 @@ int xtAppend(tid_t tid, /* transaction id */
/* - TBD for defragmentaion/reorganization -
*
- * xtDelete()
+ * xtDelete()
*
* function:
- * delete the entry with the specified key.
+ * delete the entry with the specified key.
*
- * N.B.: whole extent of the entry is assumed to be deleted.
+ * N.B.: whole extent of the entry is assumed to be deleted.
*
* parameter:
*
* return:
- * ENOENT: if the entry is not found.
+ * ENOENT: if the entry is not found.
*
* exception:
*/
@@ -2665,10 +2664,10 @@ int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag)
/* - TBD for defragmentaion/reorganization -
*
- * xtDeleteUp()
+ * xtDeleteUp()
*
* function:
- * free empty pages as propagating deletion up the tree
+ * free empty pages as propagating deletion up the tree
*
* parameter:
*
@@ -2815,15 +2814,15 @@ xtDeleteUp(tid_t tid, struct inode *ip,
/*
- * NAME: xtRelocate()
+ * NAME: xtRelocate()
*
- * FUNCTION: relocate xtpage or data extent of regular file;
- * This function is mainly used by defragfs utility.
+ * FUNCTION: relocate xtpage or data extent of regular file;
+ * This function is mainly used by defragfs utility.
*
- * NOTE: This routine does not have the logic to handle
- * uncommitted allocated extent. The caller should call
- * txCommit() to commit all the allocation before call
- * this routine.
+ * NOTE: This routine does not have the logic to handle
+ * uncommitted allocated extent. The caller should call
+ * txCommit() to commit all the allocation before call
+ * this routine.
*/
int
xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
@@ -2865,8 +2864,8 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
xtype, (ulong) xoff, xlen, (ulong) oxaddr, (ulong) nxaddr);
/*
- * 1. get and validate the parent xtpage/xad entry
- * covering the source extent to be relocated;
+ * 1. get and validate the parent xtpage/xad entry
+ * covering the source extent to be relocated;
*/
if (xtype == DATAEXT) {
/* search in leaf entry */
@@ -2910,7 +2909,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
jfs_info("xtRelocate: parent xad entry validated.");
/*
- * 2. relocate the extent
+ * 2. relocate the extent
*/
if (xtype == DATAEXT) {
/* if the extent is allocated-but-not-recorded
@@ -2923,7 +2922,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
XT_PUTPAGE(pmp);
/*
- * cmRelocate()
+ * cmRelocate()
*
* copy target data pages to be relocated;
*
@@ -2945,8 +2944,8 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
pno = offset >> CM_L2BSIZE;
npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE;
/*
- npages = ((offset + nbytes - 1) >> CM_L2BSIZE) -
- (offset >> CM_L2BSIZE) + 1;
+ npages = ((offset + nbytes - 1) >> CM_L2BSIZE) -
+ (offset >> CM_L2BSIZE) + 1;
*/
sxaddr = oxaddr;
dxaddr = nxaddr;
@@ -2981,7 +2980,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
jfs_info("xtRelocate: target data extent relocated.");
- } else { /* (xtype == XTPAGE) */
+ } else { /* (xtype == XTPAGE) */
/*
* read in the target xtpage from the source extent;
@@ -3026,16 +3025,14 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
*/
if (lmp) {
BT_MARK_DIRTY(lmp, ip);
- tlck =
- txLock(tid, ip, lmp, tlckXTREE | tlckRELINK);
+ tlck = txLock(tid, ip, lmp, tlckXTREE | tlckRELINK);
lp->header.next = cpu_to_le64(nxaddr);
XT_PUTPAGE(lmp);
}
if (rmp) {
BT_MARK_DIRTY(rmp, ip);
- tlck =
- txLock(tid, ip, rmp, tlckXTREE | tlckRELINK);
+ tlck = txLock(tid, ip, rmp, tlckXTREE | tlckRELINK);
rp->header.prev = cpu_to_le64(nxaddr);
XT_PUTPAGE(rmp);
}
@@ -3062,7 +3059,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
* scan may be skipped by commit() and logredo();
*/
BT_MARK_DIRTY(mp, ip);
- /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */
+ /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */
tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW);
xtlck = (struct xtlock *) & tlck->lock;
@@ -3084,7 +3081,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
}
/*
- * 3. acquire maplock for the source extent to be freed;
+ * 3. acquire maplock for the source extent to be freed;
*
* acquire a maplock saving the src relocated extent address;
* to free of the extent at commit time;
@@ -3105,7 +3102,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
* is no buffer associated with this lock since the buffer
* has been redirected to the target location.
*/
- else /* (xtype == XTPAGE) */
+ else /* (xtype == XTPAGE) */
tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE);
pxdlock = (struct pxd_lock *) & tlck->lock;
@@ -3115,7 +3112,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
pxdlock->index = 1;
/*
- * 4. update the parent xad entry for relocation;
+ * 4. update the parent xad entry for relocation;
*
* acquire tlck for the parent entry with XAD_NEW as entry
* update which will write LOG_REDOPAGE and update bmap for
@@ -3143,22 +3140,22 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
/*
- * xtSearchNode()
+ * xtSearchNode()
*
- * function: search for the internal xad entry covering specified extent.
- * This function is mainly used by defragfs utility.
+ * function: search for the internal xad entry covering specified extent.
+ * This function is mainly used by defragfs utility.
*
* parameters:
- * ip - file object;
- * xad - extent to find;
- * cmpp - comparison result:
- * btstack - traverse stack;
- * flag - search process flag;
+ * ip - file object;
+ * xad - extent to find;
+ * cmpp - comparison result:
+ * btstack - traverse stack;
+ * flag - search process flag;
*
* returns:
- * btstack contains (bn, index) of search path traversed to the entry.
- * *cmpp is set to result of comparison with the entry returned.
- * the page containing the entry is pinned at exit.
+ * btstack contains (bn, index) of search path traversed to the entry.
+ * *cmpp is set to result of comparison with the entry returned.
+ * the page containing the entry is pinned at exit.
*/
static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */
int *cmpp, struct btstack * btstack, int flag)
@@ -3181,7 +3178,7 @@ static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */
xaddr = addressXAD(xad);
/*
- * search down tree from root:
+ * search down tree from root:
*
* between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
* internal page, child page Pi contains entry with k, Ki <= K < Kj.
@@ -3217,7 +3214,7 @@ static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */
XT_CMP(cmp, xoff, &p->xad[index], t64);
if (cmp == 0) {
/*
- * search hit
+ * search hit
*
* verify for exact match;
*/
@@ -3245,7 +3242,7 @@ static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */
}
/*
- * search miss - non-leaf page:
+ * search miss - non-leaf page:
*
* base is the smallest index with key (Kj) greater than
* search key (K) and may be zero or maxentry index.
@@ -3268,15 +3265,15 @@ static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */
/*
- * xtRelink()
+ * xtRelink()
*
* function:
- * link around a freed page.
+ * link around a freed page.
*
* Parameter:
- * int tid,
- * struct inode *ip,
- * xtpage_t *p)
+ * int tid,
+ * struct inode *ip,
+ * xtpage_t *p)
*
* returns:
*/
@@ -3338,7 +3335,7 @@ static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * p)
/*
- * xtInitRoot()
+ * xtInitRoot()
*
* initialize file root (inline in inode)
*/
@@ -3385,42 +3382,42 @@ void xtInitRoot(tid_t tid, struct inode *ip)
#define MAX_TRUNCATE_LEAVES 50
/*
- * xtTruncate()
+ * xtTruncate()
*
* function:
- * traverse for truncation logging backward bottom up;
- * terminate at the last extent entry at the current subtree
- * root page covering new down size.
- * truncation may occur within the last extent entry.
+ * traverse for truncation logging backward bottom up;
+ * terminate at the last extent entry at the current subtree
+ * root page covering new down size.
+ * truncation may occur within the last extent entry.
*
* parameter:
- * int tid,
- * struct inode *ip,
- * s64 newsize,
- * int type) {PWMAP, PMAP, WMAP; DELETE, TRUNCATE}
+ * int tid,
+ * struct inode *ip,
+ * s64 newsize,
+ * int type) {PWMAP, PMAP, WMAP; DELETE, TRUNCATE}
*
* return:
*
* note:
- * PWMAP:
- * 1. truncate (non-COMMIT_NOLINK file)
- * by jfs_truncate() or jfs_open(O_TRUNC):
- * xtree is updated;
+ * PWMAP:
+ * 1. truncate (non-COMMIT_NOLINK file)
+ * by jfs_truncate() or jfs_open(O_TRUNC):
+ * xtree is updated;
* 2. truncate index table of directory when last entry removed
- * map update via tlock at commit time;
- * PMAP:
+ * map update via tlock at commit time;
+ * PMAP:
* Call xtTruncate_pmap instead
- * WMAP:
- * 1. remove (free zero link count) on last reference release
- * (pmap has been freed at commit zero link count);
- * 2. truncate (COMMIT_NOLINK file, i.e., tmp file):
- * xtree is updated;
- * map update directly at truncation time;
+ * WMAP:
+ * 1. remove (free zero link count) on last reference release
+ * (pmap has been freed at commit zero link count);
+ * 2. truncate (COMMIT_NOLINK file, i.e., tmp file):
+ * xtree is updated;
+ * map update directly at truncation time;
*
- * if (DELETE)
- * no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient);
- * else if (TRUNCATE)
- * must write LOG_NOREDOPAGE for deleted index page;
+ * if (DELETE)
+ * no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient);
+ * else if (TRUNCATE)
+ * must write LOG_NOREDOPAGE for deleted index page;
*
* pages may already have been tlocked by anonymous transactions
* during file growth (i.e., write) before truncation;
@@ -3493,7 +3490,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
* retained in the new sized file.
* if type is PMAP, the data and index pages are NOT
* freed, and the data and index blocks are NOT freed
- * from working map.
+ * from working map.
* (this will allow continued access of data/index of
* temporary file (zerolink count file truncated to zero-length)).
*/
@@ -3542,7 +3539,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
goto getChild;
/*
- * leaf page
+ * leaf page
*/
freed = 0;
@@ -3916,7 +3913,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
}
/*
- * internal page: go down to child page of current entry
+ * internal page: go down to child page of current entry
*/
getChild:
/* save current parent entry for the child page */
@@ -3965,7 +3962,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
/*
- * xtTruncate_pmap()
+ * xtTruncate_pmap()
*
* function:
* Perform truncate to zero lenghth for deleted file, leaving the
@@ -3974,9 +3971,9 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
* is committed to disk.
*
* parameter:
- * tid_t tid,
- * struct inode *ip,
- * s64 committed_size)
+ * tid_t tid,
+ * struct inode *ip,
+ * s64 committed_size)
*
* return: new committed size
*
@@ -4050,7 +4047,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
}
/*
- * leaf page
+ * leaf page
*/
if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
@@ -4062,7 +4059,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
xoff = offsetXAD(xad);
xlen = lengthXAD(xad);
XT_PUTPAGE(mp);
- return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
+ return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
}
tlck = txLock(tid, ip, mp, tlckXTREE);
tlck->type = tlckXTREE | tlckFREE;
@@ -4099,8 +4096,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
*/
tlck = txLock(tid, ip, mp, tlckXTREE);
xtlck = (struct xtlock *) & tlck->lock;
- xtlck->hwm.offset =
- le16_to_cpu(p->header.nextindex) - 1;
+ xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
tlck->type = tlckXTREE | tlckFREE;
XT_PUTPAGE(mp);
@@ -4118,7 +4114,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
else
index--;
/*
- * internal page: go down to child page of current entry
+ * internal page: go down to child page of current entry
*/
getChild:
/* save current parent entry for the child page */
diff --git a/fs/jfs/jfs_xtree.h b/fs/jfs/jfs_xtree.h
index 164f6f2b101..70815c8a3d6 100644
--- a/fs/jfs/jfs_xtree.h
+++ b/fs/jfs/jfs_xtree.h
@@ -19,14 +19,14 @@
#define _H_JFS_XTREE
/*
- * jfs_xtree.h: extent allocation descriptor B+-tree manager
+ * jfs_xtree.h: extent allocation descriptor B+-tree manager
*/
#include "jfs_btree.h"
/*
- * extent allocation descriptor (xad)
+ * extent allocation descriptor (xad)
*/
typedef struct xad {
unsigned flag:8; /* 1: flag */
@@ -38,30 +38,30 @@ typedef struct xad {
__le32 addr2; /* 4: address in unit of fsblksize */
} xad_t; /* (16) */
-#define MAXXLEN ((1 << 24) - 1)
+#define MAXXLEN ((1 << 24) - 1)
-#define XTSLOTSIZE 16
-#define L2XTSLOTSIZE 4
+#define XTSLOTSIZE 16
+#define L2XTSLOTSIZE 4
/* xad_t field construction */
#define XADoffset(xad, offset64)\
{\
- (xad)->off1 = ((u64)offset64) >> 32;\
- (xad)->off2 = __cpu_to_le32((offset64) & 0xffffffff);\
+ (xad)->off1 = ((u64)offset64) >> 32;\
+ (xad)->off2 = __cpu_to_le32((offset64) & 0xffffffff);\
}
#define XADaddress(xad, address64)\
{\
- (xad)->addr1 = ((u64)address64) >> 32;\
- (xad)->addr2 = __cpu_to_le32((address64) & 0xffffffff);\
+ (xad)->addr1 = ((u64)address64) >> 32;\
+ (xad)->addr2 = __cpu_to_le32((address64) & 0xffffffff);\
}
-#define XADlength(xad, length32) (xad)->len = __cpu_to_le24(length32)
+#define XADlength(xad, length32) (xad)->len = __cpu_to_le24(length32)
/* xad_t field extraction */
#define offsetXAD(xad)\
- ( ((s64)((xad)->off1)) << 32 | __le32_to_cpu((xad)->off2))
+ ( ((s64)((xad)->off1)) << 32 | __le32_to_cpu((xad)->off2))
#define addressXAD(xad)\
- ( ((s64)((xad)->addr1)) << 32 | __le32_to_cpu((xad)->addr2))
-#define lengthXAD(xad) __le24_to_cpu((xad)->len)
+ ( ((s64)((xad)->addr1)) << 32 | __le32_to_cpu((xad)->addr2))
+#define lengthXAD(xad) __le24_to_cpu((xad)->len)
/* xad list */
struct xadlist {
@@ -71,22 +71,22 @@ struct xadlist {
};
/* xad_t flags */
-#define XAD_NEW 0x01 /* new */
-#define XAD_EXTENDED 0x02 /* extended */
-#define XAD_COMPRESSED 0x04 /* compressed with recorded length */
+#define XAD_NEW 0x01 /* new */
+#define XAD_EXTENDED 0x02 /* extended */
+#define XAD_COMPRESSED 0x04 /* compressed with recorded length */
#define XAD_NOTRECORDED 0x08 /* allocated but not recorded */
-#define XAD_COW 0x10 /* copy-on-write */
+#define XAD_COW 0x10 /* copy-on-write */
/* possible values for maxentry */
-#define XTROOTINITSLOT_DIR 6
-#define XTROOTINITSLOT 10
-#define XTROOTMAXSLOT 18
-#define XTPAGEMAXSLOT 256
-#define XTENTRYSTART 2
+#define XTROOTINITSLOT_DIR 6
+#define XTROOTINITSLOT 10
+#define XTROOTMAXSLOT 18
+#define XTPAGEMAXSLOT 256
+#define XTENTRYSTART 2
/*
- * xtree page:
+ * xtree page:
*/
typedef union {
struct xtheader {
@@ -106,7 +106,7 @@ typedef union {
} xtpage_t;
/*
- * external declaration
+ * external declaration
*/
extern int xtLookup(struct inode *ip, s64 lstart, s64 llen,
int *pflag, s64 * paddr, int *plen, int flag);
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 41c20477126..25161c4121e 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -328,7 +328,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
* dentry - child directory dentry
*
* RETURN: -EINVAL - if name is . or ..
- * -EINVAL - if . or .. exist but are invalid.
+ * -EINVAL - if . or .. exist but are invalid.
* errors from subroutines
*
* note:
@@ -517,7 +517,7 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
inode_dec_link_count(ip);
/*
- * commit zero link count object
+ * commit zero link count object
*/
if (ip->i_nlink == 0) {
assert(!test_cflag(COMMIT_Nolink, ip));
@@ -596,7 +596,7 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
/*
* NAME: commitZeroLink()
*
- * FUNCTION: for non-directory, called by jfs_remove(),
+ * FUNCTION: for non-directory, called by jfs_remove(),
* truncate a regular file, directory or symbolic
* link to zero length. return 0 if type is not
* one of these.
@@ -676,7 +676,7 @@ static s64 commitZeroLink(tid_t tid, struct inode *ip)
/*
* NAME: jfs_free_zero_link()
*
- * FUNCTION: for non-directory, called by iClose(),
+ * FUNCTION: for non-directory, called by iClose(),
* free resources of a file from cache and WORKING map
* for a file previously committed with zero link count
* while associated with a pager object,
@@ -855,12 +855,12 @@ static int jfs_link(struct dentry *old_dentry,
* NAME: jfs_symlink(dip, dentry, name)
*
* FUNCTION: creates a symbolic link to <symlink> by name <name>
- * in directory <dip>
+ * in directory <dip>
*
- * PARAMETER: dip - parent directory vnode
- * dentry - dentry of symbolic link
- * name - the path name of the existing object
- * that will be the source of the link
+ * PARAMETER: dip - parent directory vnode
+ * dentry - dentry of symbolic link
+ * name - the path name of the existing object
+ * that will be the source of the link
*
* RETURN: errors from subroutines
*
@@ -1052,9 +1052,9 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
/*
- * NAME: jfs_rename
+ * NAME: jfs_rename
*
- * FUNCTION: rename a file or directory
+ * FUNCTION: rename a file or directory
*/
static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry)
@@ -1331,9 +1331,9 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
/*
- * NAME: jfs_mknod
+ * NAME: jfs_mknod
*
- * FUNCTION: Create a special file (device)
+ * FUNCTION: Create a special file (device)
*/
static int jfs_mknod(struct inode *dir, struct dentry *dentry,
int mode, dev_t rdev)
diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c
index 79d625f3f73..71984ee9534 100644
--- a/fs/jfs/resize.c
+++ b/fs/jfs/resize.c
@@ -29,17 +29,17 @@
#include "jfs_txnmgr.h"
#include "jfs_debug.h"
-#define BITSPERPAGE (PSIZE << 3)
-#define L2MEGABYTE 20
-#define MEGABYTE (1 << L2MEGABYTE)
-#define MEGABYTE32 (MEGABYTE << 5)
+#define BITSPERPAGE (PSIZE << 3)
+#define L2MEGABYTE 20
+#define MEGABYTE (1 << L2MEGABYTE)
+#define MEGABYTE32 (MEGABYTE << 5)
/* convert block number to bmap file page number */
#define BLKTODMAPN(b)\
- (((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1)
+ (((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1)
/*
- * jfs_extendfs()
+ * jfs_extendfs()
*
* function: extend file system;
*
@@ -48,9 +48,9 @@
* workspace space
*
* input:
- * new LVSize: in LV blocks (required)
- * new LogSize: in LV blocks (optional)
- * new FSSize: in LV blocks (optional)
+ * new LVSize: in LV blocks (required)
+ * new LogSize: in LV blocks (optional)
+ * new FSSize: in LV blocks (optional)
*
* new configuration:
* 1. set new LogSize as specified or default from new LVSize;
@@ -125,8 +125,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
}
/*
- * reconfigure LV spaces
- * ---------------------
+ * reconfigure LV spaces
+ * ---------------------
*
* validate new size, or, if not specified, determine new size
*/
@@ -198,7 +198,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
log_formatted = 1;
}
/*
- * quiesce file system
+ * quiesce file system
*
* (prepare to move the inline log and to prevent map update)
*
@@ -270,8 +270,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
}
/*
- * extend block allocation map
- * ---------------------------
+ * extend block allocation map
+ * ---------------------------
*
* extendfs() for new extension, retry after crash recovery;
*
@@ -283,7 +283,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
* s_size: aggregate size in physical blocks;
*/
/*
- * compute the new block allocation map configuration
+ * compute the new block allocation map configuration
*
* map dinode:
* di_size: map file size in byte;
@@ -301,7 +301,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
newNpages = BLKTODMAPN(t64) + 1;
/*
- * extend map from current map (WITHOUT growing mapfile)
+ * extend map from current map (WITHOUT growing mapfile)
*
* map new extension with unmapped part of the last partial
* dmap page, if applicable, and extra page(s) allocated
@@ -341,8 +341,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
XSize -= nblocks;
/*
- * grow map file to cover remaining extension
- * and/or one extra dmap page for next extendfs();
+ * grow map file to cover remaining extension
+ * and/or one extra dmap page for next extendfs();
*
* allocate new map pages and its backing blocks, and
* update map file xtree
@@ -422,8 +422,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
dbFinalizeBmap(ipbmap);
/*
- * update inode allocation map
- * ---------------------------
+ * update inode allocation map
+ * ---------------------------
*
* move iag lists from old to new iag;
* agstart field is not updated for logredo() to reconstruct
@@ -442,8 +442,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
}
/*
- * finalize
- * --------
+ * finalize
+ * --------
*
* extension is committed when on-disk super block is
* updated with new descriptors: logredo will recover
@@ -480,7 +480,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
diFreeSpecial(ipbmap2);
/*
- * update superblock
+ * update superblock
*/
if ((rc = readSuper(sb, &bh)))
goto error_out;
@@ -530,7 +530,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
resume:
/*
- * resume file system transactions
+ * resume file system transactions
*/
txResume(sb);
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index b753ba21645..b2375f0774b 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -63,9 +63,9 @@
*
* On-disk:
*
- * FEALISTs are stored on disk using blocks allocated by dbAlloc() and
- * written directly. An EA list may be in-lined in the inode if there is
- * sufficient room available.
+ * FEALISTs are stored on disk using blocks allocated by dbAlloc() and
+ * written directly. An EA list may be in-lined in the inode if there is
+ * sufficient room available.
*/
struct ea_buffer {
@@ -590,7 +590,8 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size)
size_check:
if (EALIST_SIZE(ea_buf->xattr) != ea_size) {
printk(KERN_ERR "ea_get: invalid extended attribute\n");
- dump_mem("xattr", ea_buf->xattr, ea_size);
+ print_hex_dump(KERN_ERR, "", DUMP_PREFIX_ADDRESS, 16, 1,
+ ea_buf->xattr, ea_size, 1);
ea_release(inode, ea_buf);
rc = -EIO;
goto clean_up;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 74f30e0c038..98e78e2f18d 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -165,7 +165,6 @@ static inline char * task_state(struct task_struct *p, char *buffer)
rcu_read_lock();
buffer += sprintf(buffer,
"State:\t%s\n"
- "SleepAVG:\t%lu%%\n"
"Tgid:\t%d\n"
"Pid:\t%d\n"
"PPid:\t%d\n"
@@ -173,7 +172,6 @@ static inline char * task_state(struct task_struct *p, char *buffer)
"Uid:\t%d\t%d\t%d\t%d\n"
"Gid:\t%d\t%d\t%d\t%d\n",
get_task_state(p),
- (p->sleep_avg/1024)*100/(1020000000/1024),
p->tgid, p->pid,
pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0,
pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0,
@@ -312,6 +310,41 @@ int proc_pid_status(struct task_struct *task, char * buffer)
return buffer - orig;
}
+static clock_t task_utime(struct task_struct *p)
+{
+ clock_t utime = cputime_to_clock_t(p->utime),
+ total = utime + cputime_to_clock_t(p->stime);
+ u64 temp;
+
+ /*
+ * Use CFS's precise accounting:
+ */
+ temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime);
+
+ if (total) {
+ temp *= utime;
+ do_div(temp, total);
+ }
+ utime = (clock_t)temp;
+
+ return utime;
+}
+
+static clock_t task_stime(struct task_struct *p)
+{
+ clock_t stime = cputime_to_clock_t(p->stime);
+
+ /*
+ * Use CFS's precise accounting. (we subtract utime from
+ * the total, to make sure the total observed by userspace
+ * grows monotonically - apps rely on that):
+ */
+ stime = nsec_to_clock_t(p->se.sum_exec_runtime) - task_utime(p);
+
+ return stime;
+}
+
+
static int do_task_stat(struct task_struct *task, char * buffer, int whole)
{
unsigned long vsize, eip, esp, wchan = ~0UL;
@@ -326,7 +359,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
unsigned long long start_time;
unsigned long cmin_flt = 0, cmaj_flt = 0;
unsigned long min_flt = 0, maj_flt = 0;
- cputime_t cutime, cstime, utime, stime;
+ cputime_t cutime, cstime;
+ clock_t utime, stime;
unsigned long rsslim = 0;
char tcomm[sizeof(task->comm)];
unsigned long flags;
@@ -344,7 +378,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
sigemptyset(&sigign);
sigemptyset(&sigcatch);
- cutime = cstime = utime = stime = cputime_zero;
+ cutime = cstime = cputime_zero;
+ utime = stime = 0;
rcu_read_lock();
if (lock_task_sighand(task, &flags)) {
@@ -370,15 +405,15 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
do {
min_flt += t->min_flt;
maj_flt += t->maj_flt;
- utime = cputime_add(utime, t->utime);
- stime = cputime_add(stime, t->stime);
+ utime += task_utime(t);
+ stime += task_stime(t);
t = next_thread(t);
} while (t != task);
min_flt += sig->min_flt;
maj_flt += sig->maj_flt;
- utime = cputime_add(utime, sig->utime);
- stime = cputime_add(stime, sig->stime);
+ utime += cputime_to_clock_t(sig->utime);
+ stime += cputime_to_clock_t(sig->stime);
}
sid = signal_session(sig);
@@ -394,8 +429,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
if (!whole) {
min_flt = task->min_flt;
maj_flt = task->maj_flt;
- utime = task->utime;
- stime = task->stime;
+ utime = task_utime(task);
+ stime = task_stime(task);
}
/* scale priority and nice values from timeslices to -20..20 */
@@ -426,8 +461,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
cmin_flt,
maj_flt,
cmaj_flt,
- cputime_to_clock_t(utime),
- cputime_to_clock_t(stime),
+ utime,
+ stime,
cputime_to_clock_t(cutime),
cputime_to_clock_t(cstime),
priority,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index a5fa1fdafc4..46ea5d56e1b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -296,7 +296,7 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer)
*/
static int proc_pid_schedstat(struct task_struct *task, char *buffer)
{
- return sprintf(buffer, "%lu %lu %lu\n",
+ return sprintf(buffer, "%llu %llu %lu\n",
task->sched_info.cpu_time,
task->sched_info.run_delay,
task->sched_info.pcnt);
@@ -929,6 +929,69 @@ static const struct file_operations proc_fault_inject_operations = {
};
#endif
+#ifdef CONFIG_SCHED_DEBUG
+/*
+ * Print out various scheduling related per-task fields:
+ */
+static int sched_show(struct seq_file *m, void *v)
+{
+ struct inode *inode = m->private;
+ struct task_struct *p;
+
+ WARN_ON(!inode);
+
+ p = get_proc_task(inode);
+ if (!p)
+ return -ESRCH;
+ proc_sched_show_task(p, m);
+
+ put_task_struct(p);
+
+ return 0;
+}
+
+static ssize_t
+sched_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *offset)
+{
+ struct inode *inode = file->f_path.dentry->d_inode;
+ struct task_struct *p;
+
+ WARN_ON(!inode);
+
+ p = get_proc_task(inode);
+ if (!p)
+ return -ESRCH;
+ proc_sched_set_task(p);
+
+ put_task_struct(p);
+
+ return count;
+}
+
+static int sched_open(struct inode *inode, struct file *filp)
+{
+ int ret;
+
+ ret = single_open(filp, sched_show, NULL);
+ if (!ret) {
+ struct seq_file *m = filp->private_data;
+
+ m->private = inode;
+ }
+ return ret;
+}
+
+static const struct file_operations proc_pid_sched_operations = {
+ .open = sched_open,
+ .read = seq_read,
+ .write = sched_write,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+#endif
+
static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
{
struct inode *inode = dentry->d_inode;
@@ -1963,6 +2026,9 @@ static const struct pid_entry tgid_base_stuff[] = {
INF("environ", S_IRUSR, pid_environ),
INF("auxv", S_IRUSR, pid_auxv),
INF("status", S_IRUGO, pid_status),
+#ifdef CONFIG_SCHED_DEBUG
+ REG("sched", S_IRUGO|S_IWUSR, pid_sched),
+#endif
INF("cmdline", S_IRUGO, pid_cmdline),
INF("stat", S_IRUGO, tgid_stat),
INF("statm", S_IRUGO, pid_statm),
@@ -2247,6 +2313,9 @@ static const struct pid_entry tid_base_stuff[] = {
INF("environ", S_IRUSR, pid_environ),
INF("auxv", S_IRUSR, pid_auxv),
INF("status", S_IRUGO, pid_status),
+#ifdef CONFIG_SCHED_DEBUG
+ REG("sched", S_IRUGO|S_IWUSR, pid_sched),
+#endif
INF("cmdline", S_IRUGO, pid_cmdline),
INF("stat", S_IRUGO, tid_stat),
INF("statm", S_IRUGO, pid_statm),
diff --git a/include/asm-generic/bitops/sched.h b/include/asm-generic/bitops/sched.h
index 815bb014806..604fab7031a 100644
--- a/include/asm-generic/bitops/sched.h
+++ b/include/asm-generic/bitops/sched.h
@@ -6,28 +6,23 @@
/*
* Every architecture must define this function. It's the fastest
- * way of searching a 140-bit bitmap where the first 100 bits are
- * unlikely to be set. It's guaranteed that at least one of the 140
- * bits is cleared.
+ * way of searching a 100-bit bitmap. It's guaranteed that at least
+ * one of the 100 bits is cleared.
*/
static inline int sched_find_first_bit(const unsigned long *b)
{
#if BITS_PER_LONG == 64
- if (unlikely(b[0]))
+ if (b[0])
return __ffs(b[0]);
- if (likely(b[1]))
- return __ffs(b[1]) + 64;
- return __ffs(b[2]) + 128;
+ return __ffs(b[1]) + 64;
#elif BITS_PER_LONG == 32
- if (unlikely(b[0]))
+ if (b[0])
return __ffs(b[0]);
- if (unlikely(b[1]))
+ if (b[1])
return __ffs(b[1]) + 32;
- if (unlikely(b[2]))
+ if (b[2])
return __ffs(b[2]) + 64;
- if (b[3])
- return __ffs(b[3]) + 96;
- return __ffs(b[4]) + 128;
+ return __ffs(b[3]) + 96;
#else
#error BITS_PER_LONG not defined
#endif
diff --git a/include/asm-mips/mach-au1x00/au1xxx_ide.h b/include/asm-mips/mach-au1x00/au1xxx_ide.h
index 8fcae21adbd..4663e8b415c 100644
--- a/include/asm-mips/mach-au1x00/au1xxx_ide.h
+++ b/include/asm-mips/mach-au1x00/au1xxx_ide.h
@@ -88,26 +88,26 @@ static const struct drive_list_entry dma_white_list [] = {
/*
* Hitachi
*/
- { "HITACHI_DK14FA-20" , "ALL" },
- { "HTS726060M9AT00" , "ALL" },
+ { "HITACHI_DK14FA-20" , NULL },
+ { "HTS726060M9AT00" , NULL },
/*
* Maxtor
*/
- { "Maxtor 6E040L0" , "ALL" },
- { "Maxtor 6Y080P0" , "ALL" },
- { "Maxtor 6Y160P0" , "ALL" },
+ { "Maxtor 6E040L0" , NULL },
+ { "Maxtor 6Y080P0" , NULL },
+ { "Maxtor 6Y160P0" , NULL },
/*
* Seagate
*/
- { "ST3120026A" , "ALL" },
- { "ST320014A" , "ALL" },
- { "ST94011A" , "ALL" },
- { "ST340016A" , "ALL" },
+ { "ST3120026A" , NULL },
+ { "ST320014A" , NULL },
+ { "ST94011A" , NULL },
+ { "ST340016A" , NULL },
/*
* Western Digital
*/
- { "WDC WD400UE-00HCT0" , "ALL" },
- { "WDC WD400JB-00JJC0" , "ALL" },
+ { "WDC WD400UE-00HCT0" , NULL },
+ { "WDC WD400JB-00JJC0" , NULL },
{ NULL , NULL }
};
@@ -116,9 +116,9 @@ static const struct drive_list_entry dma_black_list [] = {
/*
* Western Digital
*/
- { "WDC WD100EB-00CGH0" , "ALL" },
- { "WDC WD200BB-00AUA1" , "ALL" },
- { "WDC AC24300L" , "ALL" },
+ { "WDC WD100EB-00CGH0" , NULL },
+ { "WDC WD200BB-00AUA1" , NULL },
+ { "WDC AC24300L" , NULL },
{ NULL , NULL }
};
#endif
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 7803014f3a1..8d302298a16 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -79,6 +79,19 @@
#endif
#ifdef CONFIG_PREEMPT
+# define PREEMPT_CHECK_OFFSET 1
+#else
+# define PREEMPT_CHECK_OFFSET 0
+#endif
+
+/*
+ * Check whether we were atomic before we did preempt_disable():
+ * (used by the scheduler)
+ */
+#define in_atomic_preempt_off() \
+ ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET)
+
+#ifdef CONFIG_PREEMPT
# define preemptible() (preempt_count() == 0 && !irqs_disabled())
# define IRQ_EXIT_OFFSET (HARDIRQ_OFFSET-1)
#else
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 1e365acdd36..19ab2580405 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -25,6 +25,7 @@
#include <asm/system.h>
#include <asm/io.h>
#include <asm/semaphore.h>
+#include <asm/mutex.h>
/******************************************************************************
* IDE driver configuration options (play with these as desired):
@@ -685,6 +686,8 @@ typedef struct hwif_s {
u8 mwdma_mask;
u8 swdma_mask;
+ u8 cbl; /* cable type */
+
hwif_chipset_t chipset; /* sub-module for tuning.. */
struct pci_dev *pci_dev; /* for pci chipsets */
@@ -735,8 +738,8 @@ typedef struct hwif_s {
void (*ide_dma_clear_irq)(ide_drive_t *drive);
void (*dma_host_on)(ide_drive_t *drive);
void (*dma_host_off)(ide_drive_t *drive);
- int (*ide_dma_lostirq)(ide_drive_t *drive);
- int (*ide_dma_timeout)(ide_drive_t *drive);
+ void (*dma_lost_irq)(ide_drive_t *drive);
+ void (*dma_timeout)(ide_drive_t *drive);
void (*OUTB)(u8 addr, unsigned long port);
void (*OUTBSYNC)(ide_drive_t *drive, u8 addr, unsigned long port);
@@ -791,7 +794,6 @@ typedef struct hwif_s {
unsigned sharing_irq: 1; /* 1 = sharing irq with another hwif */
unsigned reset : 1; /* reset after probe */
unsigned autodma : 1; /* auto-attempt using DMA at boot */
- unsigned udma_four : 1; /* 1=ATA-66 capable, 0=default */
unsigned no_lba48 : 1; /* 1 = cannot do LBA48 */
unsigned no_lba48_dma : 1; /* 1 = cannot do LBA48 DMA */
unsigned auto_poll : 1; /* supports nop auto-poll */
@@ -863,7 +865,7 @@ typedef struct hwgroup_s {
typedef struct ide_driver_s ide_driver_t;
-extern struct semaphore ide_setting_sem;
+extern struct mutex ide_setting_mtx;
int set_io_32bit(ide_drive_t *, int);
int set_pio_mode(ide_drive_t *, int);
@@ -1304,8 +1306,8 @@ extern int __ide_dma_check(ide_drive_t *);
extern int ide_dma_setup(ide_drive_t *);
extern void ide_dma_start(ide_drive_t *);
extern int __ide_dma_end(ide_drive_t *);
-extern int __ide_dma_lostirq(ide_drive_t *);
-extern int __ide_dma_timeout(ide_drive_t *);
+extern void ide_dma_lost_irq(ide_drive_t *);
+extern void ide_dma_timeout(ide_drive_t *);
#endif /* CONFIG_BLK_DEV_IDEDMA_PCI */
#else
@@ -1382,11 +1384,11 @@ extern const ide_pio_timings_t ide_pio_timings[6];
extern spinlock_t ide_lock;
-extern struct semaphore ide_cfg_sem;
+extern struct mutex ide_cfg_mtx;
/*
* Structure locking:
*
- * ide_cfg_sem and ide_lock together protect changes to
+ * ide_cfg_mtx and ide_lock together protect changes to
* ide_hwif_t->{next,hwgroup}
* ide_drive_t->next
*
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 693f0e6c54d..cfb680585ab 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -34,6 +34,8 @@
#define SCHED_FIFO 1
#define SCHED_RR 2
#define SCHED_BATCH 3
+/* SCHED_ISO: reserved but not implemented yet */
+#define SCHED_IDLE 5
#ifdef __KERNEL__
@@ -130,6 +132,26 @@ extern unsigned long nr_active(void);
extern unsigned long nr_iowait(void);
extern unsigned long weighted_cpuload(const int cpu);
+struct seq_file;
+struct cfs_rq;
+#ifdef CONFIG_SCHED_DEBUG
+extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m);
+extern void proc_sched_set_task(struct task_struct *p);
+extern void
+print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq, u64 now);
+#else
+static inline void
+proc_sched_show_task(struct task_struct *p, struct seq_file *m)
+{
+}
+static inline void proc_sched_set_task(struct task_struct *p)
+{
+}
+static inline void
+print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq, u64 now)
+{
+}
+#endif
/*
* Task state bitmask. NOTE! These bits are also
@@ -193,6 +215,7 @@ struct task_struct;
extern void sched_init(void);
extern void sched_init_smp(void);
extern void init_idle(struct task_struct *idle, int cpu);
+extern void init_idle_bootup_task(struct task_struct *idle);
extern cpumask_t nohz_cpu_mask;
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
@@ -479,7 +502,7 @@ struct signal_struct {
* from jiffies_to_ns(utime + stime) if sched_clock uses something
* other than jiffies.)
*/
- unsigned long long sched_time;
+ unsigned long long sum_sched_runtime;
/*
* We don't bother to synchronize most readers of this at all,
@@ -521,31 +544,6 @@ struct signal_struct {
#define SIGNAL_STOP_CONTINUED 0x00000004 /* SIGCONT since WCONTINUED reap */
#define SIGNAL_GROUP_EXIT 0x00000008 /* group exit in progress */
-
-/*
- * Priority of a process goes from 0..MAX_PRIO-1, valid RT
- * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
- * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority
- * values are inverted: lower p->prio value means higher priority.
- *
- * The MAX_USER_RT_PRIO value allows the actual maximum
- * RT priority to be separate from the value exported to
- * user-space. This allows kernel threads to set their
- * priority to a value higher than any user task. Note:
- * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
- */
-
-#define MAX_USER_RT_PRIO 100
-#define MAX_RT_PRIO MAX_USER_RT_PRIO
-
-#define MAX_PRIO (MAX_RT_PRIO + 40)
-
-#define rt_prio(prio) unlikely((prio) < MAX_RT_PRIO)
-#define rt_task(p) rt_prio((p)->prio)
-#define batch_task(p) (unlikely((p)->policy == SCHED_BATCH))
-#define is_rt_policy(p) ((p) != SCHED_NORMAL && (p) != SCHED_BATCH)
-#define has_rt_policy(p) unlikely(is_rt_policy((p)->policy))
-
/*
* Some day this will be a full-fledged user tracking system..
*/
@@ -583,13 +581,13 @@ struct reclaim_state;
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
struct sched_info {
/* cumulative counters */
- unsigned long cpu_time, /* time spent on the cpu */
- run_delay, /* time spent waiting on a runqueue */
- pcnt; /* # of timeslices run on this cpu */
+ unsigned long pcnt; /* # of times run on this cpu */
+ unsigned long long cpu_time, /* time spent on the cpu */
+ run_delay; /* time spent waiting on a runqueue */
/* timestamps */
- unsigned long last_arrival, /* when we last ran on a cpu */
- last_queued; /* when we were last queued to run */
+ unsigned long long last_arrival,/* when we last ran on a cpu */
+ last_queued; /* when we were last queued to run */
};
#endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */
@@ -639,18 +637,24 @@ static inline int sched_info_on(void)
#endif
}
-enum idle_type
-{
- SCHED_IDLE,
- NOT_IDLE,
- NEWLY_IDLE,
- MAX_IDLE_TYPES
+enum cpu_idle_type {
+ CPU_IDLE,
+ CPU_NOT_IDLE,
+ CPU_NEWLY_IDLE,
+ CPU_MAX_IDLE_TYPES
};
/*
* sched-domains (multiprocessor balancing) declarations:
*/
-#define SCHED_LOAD_SCALE 128UL /* increase resolution of load */
+
+/*
+ * Increase resolution of nice-level calculations:
+ */
+#define SCHED_LOAD_SHIFT 10
+#define SCHED_LOAD_SCALE (1L << SCHED_LOAD_SHIFT)
+
+#define SCHED_LOAD_SCALE_FUZZ (SCHED_LOAD_SCALE >> 5)
#ifdef CONFIG_SMP
#define SD_LOAD_BALANCE 1 /* Do load balancing on this domain. */
@@ -719,14 +723,14 @@ struct sched_domain {
#ifdef CONFIG_SCHEDSTATS
/* load_balance() stats */
- unsigned long lb_cnt[MAX_IDLE_TYPES];
- unsigned long lb_failed[MAX_IDLE_TYPES];
- unsigned long lb_balanced[MAX_IDLE_TYPES];
- unsigned long lb_imbalance[MAX_IDLE_TYPES];
- unsigned long lb_gained[MAX_IDLE_TYPES];
- unsigned long lb_hot_gained[MAX_IDLE_TYPES];
- unsigned long lb_nobusyg[MAX_IDLE_TYPES];
- unsigned long lb_nobusyq[MAX_IDLE_TYPES];
+ unsigned long lb_cnt[CPU_MAX_IDLE_TYPES];
+ unsigned long lb_failed[CPU_MAX_IDLE_TYPES];
+ unsigned long lb_balanced[CPU_MAX_IDLE_TYPES];
+ unsigned long lb_imbalance[CPU_MAX_IDLE_TYPES];
+ unsigned long lb_gained[CPU_MAX_IDLE_TYPES];
+ unsigned long lb_hot_gained[CPU_MAX_IDLE_TYPES];
+ unsigned long lb_nobusyg[CPU_MAX_IDLE_TYPES];
+ unsigned long lb_nobusyq[CPU_MAX_IDLE_TYPES];
/* Active load balancing */
unsigned long alb_cnt;
@@ -753,12 +757,6 @@ struct sched_domain {
extern int partition_sched_domains(cpumask_t *partition1,
cpumask_t *partition2);
-/*
- * Maximum cache size the migration-costs auto-tuning code will
- * search from:
- */
-extern unsigned int max_cache_size;
-
#endif /* CONFIG_SMP */
@@ -809,14 +807,86 @@ struct mempolicy;
struct pipe_inode_info;
struct uts_namespace;
-enum sleep_type {
- SLEEP_NORMAL,
- SLEEP_NONINTERACTIVE,
- SLEEP_INTERACTIVE,
- SLEEP_INTERRUPTED,
+struct rq;
+struct sched_domain;
+
+struct sched_class {
+ struct sched_class *next;
+
+ void (*enqueue_task) (struct rq *rq, struct task_struct *p,
+ int wakeup, u64 now);
+ void (*dequeue_task) (struct rq *rq, struct task_struct *p,
+ int sleep, u64 now);
+ void (*yield_task) (struct rq *rq, struct task_struct *p);
+
+ void (*check_preempt_curr) (struct rq *rq, struct task_struct *p);
+
+ struct task_struct * (*pick_next_task) (struct rq *rq, u64 now);
+ void (*put_prev_task) (struct rq *rq, struct task_struct *p, u64 now);
+
+ int (*load_balance) (struct rq *this_rq, int this_cpu,
+ struct rq *busiest,
+ unsigned long max_nr_move, unsigned long max_load_move,
+ struct sched_domain *sd, enum cpu_idle_type idle,
+ int *all_pinned, unsigned long *total_load_moved);
+
+ void (*set_curr_task) (struct rq *rq);
+ void (*task_tick) (struct rq *rq, struct task_struct *p);
+ void (*task_new) (struct rq *rq, struct task_struct *p);
};
-struct prio_array;
+struct load_weight {
+ unsigned long weight, inv_weight;
+};
+
+/*
+ * CFS stats for a schedulable entity (task, task-group etc)
+ *
+ * Current field usage histogram:
+ *
+ * 4 se->block_start
+ * 4 se->run_node
+ * 4 se->sleep_start
+ * 4 se->sleep_start_fair
+ * 6 se->load.weight
+ * 7 se->delta_fair
+ * 15 se->wait_runtime
+ */
+struct sched_entity {
+ long wait_runtime;
+ unsigned long delta_fair_run;
+ unsigned long delta_fair_sleep;
+ unsigned long delta_exec;
+ s64 fair_key;
+ struct load_weight load; /* for load-balancing */
+ struct rb_node run_node;
+ unsigned int on_rq;
+
+ u64 wait_start_fair;
+ u64 wait_start;
+ u64 exec_start;
+ u64 sleep_start;
+ u64 sleep_start_fair;
+ u64 block_start;
+ u64 sleep_max;
+ u64 block_max;
+ u64 exec_max;
+ u64 wait_max;
+ u64 last_ran;
+
+ u64 sum_exec_runtime;
+ s64 sum_wait_runtime;
+ s64 sum_sleep_runtime;
+ unsigned long wait_runtime_overruns;
+ unsigned long wait_runtime_underruns;
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ struct sched_entity *parent;
+ /* rq on which this entity is (to be) queued: */
+ struct cfs_rq *cfs_rq;
+ /* rq "owned" by this entity/group: */
+ struct cfs_rq *my_q;
+#endif
+};
struct task_struct {
volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
@@ -832,23 +902,20 @@ struct task_struct {
int oncpu;
#endif
#endif
- int load_weight; /* for niceness load balancing purposes */
+
int prio, static_prio, normal_prio;
struct list_head run_list;
- struct prio_array *array;
+ struct sched_class *sched_class;
+ struct sched_entity se;
unsigned short ioprio;
#ifdef CONFIG_BLK_DEV_IO_TRACE
unsigned int btrace_seq;
#endif
- unsigned long sleep_avg;
- unsigned long long timestamp, last_ran;
- unsigned long long sched_time; /* sched_clock time spent running */
- enum sleep_type sleep_type;
unsigned int policy;
cpumask_t cpus_allowed;
- unsigned int time_slice, first_time_slice;
+ unsigned int time_slice;
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
struct sched_info sched_info;
@@ -1078,6 +1145,37 @@ struct task_struct {
#endif
};
+/*
+ * Priority of a process goes from 0..MAX_PRIO-1, valid RT
+ * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
+ * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority
+ * values are inverted: lower p->prio value means higher priority.
+ *
+ * The MAX_USER_RT_PRIO value allows the actual maximum
+ * RT priority to be separate from the value exported to
+ * user-space. This allows kernel threads to set their
+ * priority to a value higher than any user task. Note:
+ * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
+ */
+
+#define MAX_USER_RT_PRIO 100
+#define MAX_RT_PRIO MAX_USER_RT_PRIO
+
+#define MAX_PRIO (MAX_RT_PRIO + 40)
+#define DEFAULT_PRIO (MAX_RT_PRIO + 20)
+
+static inline int rt_prio(int prio)
+{
+ if (unlikely(prio < MAX_RT_PRIO))
+ return 1;
+ return 0;
+}
+
+static inline int rt_task(struct task_struct *p)
+{
+ return rt_prio(p->prio);
+}
+
static inline pid_t process_group(struct task_struct *tsk)
{
return tsk->signal->pgrp;
@@ -1223,7 +1321,7 @@ static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
extern unsigned long long sched_clock(void);
extern unsigned long long
-current_sched_time(const struct task_struct *current_task);
+task_sched_runtime(struct task_struct *task);
/* sched_exec is called by processes performing an exec */
#ifdef CONFIG_SMP
@@ -1232,6 +1330,8 @@ extern void sched_exec(void);
#define sched_exec() {}
#endif
+extern void sched_clock_unstable_event(void);
+
#ifdef CONFIG_HOTPLUG_CPU
extern void idle_task_exit(void);
#else
@@ -1240,6 +1340,14 @@ static inline void idle_task_exit(void) {}
extern void sched_idle_next(void);
+extern unsigned int sysctl_sched_granularity;
+extern unsigned int sysctl_sched_wakeup_granularity;
+extern unsigned int sysctl_sched_batch_wakeup_granularity;
+extern unsigned int sysctl_sched_stat_granularity;
+extern unsigned int sysctl_sched_runtime_limit;
+extern unsigned int sysctl_sched_child_runs_first;
+extern unsigned int sysctl_sched_features;
+
#ifdef CONFIG_RT_MUTEXES
extern int rt_mutex_getprio(struct task_struct *p);
extern void rt_mutex_setprio(struct task_struct *p, int prio);
@@ -1317,8 +1425,8 @@ extern void FASTCALL(wake_up_new_task(struct task_struct * tsk,
#else
static inline void kick_process(struct task_struct *tsk) { }
#endif
-extern void FASTCALL(sched_fork(struct task_struct * p, int clone_flags));
-extern void FASTCALL(sched_exit(struct task_struct * p));
+extern void sched_fork(struct task_struct *p, int clone_flags);
+extern void sched_dead(struct task_struct *p);
extern int in_group_p(gid_t);
extern int in_egroup_p(gid_t);
@@ -1406,7 +1514,7 @@ extern struct mm_struct * mm_alloc(void);
extern void FASTCALL(__mmdrop(struct mm_struct *));
static inline void mmdrop(struct mm_struct * mm)
{
- if (atomic_dec_and_test(&mm->mm_count))
+ if (unlikely(atomic_dec_and_test(&mm->mm_count)))
__mmdrop(mm);
}
@@ -1638,10 +1746,7 @@ static inline unsigned int task_cpu(const struct task_struct *p)
return task_thread_info(p)->cpu;
}
-static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
-{
- task_thread_info(p)->cpu = cpu;
-}
+extern void set_task_cpu(struct task_struct *p, unsigned int cpu);
#else
diff --git a/include/linux/topology.h b/include/linux/topology.h
index a9d1f049cc1..da6c39b2d05 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -98,7 +98,7 @@
.cache_nice_tries = 0, \
.busy_idx = 0, \
.idle_idx = 0, \
- .newidle_idx = 1, \
+ .newidle_idx = 0, \
.wake_idx = 0, \
.forkexec_idx = 0, \
.flags = SD_LOAD_BALANCE \
@@ -128,14 +128,15 @@
.imbalance_pct = 125, \
.cache_nice_tries = 1, \
.busy_idx = 2, \
- .idle_idx = 1, \
- .newidle_idx = 2, \
+ .idle_idx = 0, \
+ .newidle_idx = 0, \
.wake_idx = 1, \
.forkexec_idx = 1, \
.flags = SD_LOAD_BALANCE \
| SD_BALANCE_NEWIDLE \
| SD_BALANCE_EXEC \
| SD_WAKE_AFFINE \
+ | SD_WAKE_IDLE \
| SD_SHARE_PKG_RESOURCES\
| BALANCE_FOR_MC_POWER, \
.last_balance = jiffies, \
@@ -158,14 +159,15 @@
.imbalance_pct = 125, \
.cache_nice_tries = 1, \
.busy_idx = 2, \
- .idle_idx = 1, \
- .newidle_idx = 2, \
+ .idle_idx = 0, \
+ .newidle_idx = 0, \
.wake_idx = 1, \
.forkexec_idx = 1, \
.flags = SD_LOAD_BALANCE \
| SD_BALANCE_NEWIDLE \
| SD_BALANCE_EXEC \
| SD_WAKE_AFFINE \
+ | SD_WAKE_IDLE \
| BALANCE_FOR_PKG_POWER,\
.last_balance = jiffies, \
.balance_interval = 1, \
diff --git a/include/linux/wait.h b/include/linux/wait.h
index e820d00e138..0e686280450 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -366,15 +366,15 @@ static inline void remove_wait_queue_locked(wait_queue_head_t *q,
/*
* These are the old interfaces to sleep waiting for an event.
- * They are racy. DO NOT use them, use the wait_event* interfaces above.
- * We plan to remove these interfaces during 2.7.
+ * They are racy. DO NOT use them, use the wait_event* interfaces above.
+ * We plan to remove these interfaces.
*/
-extern void FASTCALL(sleep_on(wait_queue_head_t *q));
-extern long FASTCALL(sleep_on_timeout(wait_queue_head_t *q,
- signed long timeout));
-extern void FASTCALL(interruptible_sleep_on(wait_queue_head_t *q));
-extern long FASTCALL(interruptible_sleep_on_timeout(wait_queue_head_t *q,
- signed long timeout));
+extern void sleep_on(wait_queue_head_t *q);
+extern long sleep_on_timeout(wait_queue_head_t *q,
+ signed long timeout);
+extern void interruptible_sleep_on(wait_queue_head_t *q);
+extern long interruptible_sleep_on_timeout(wait_queue_head_t *q,
+ signed long timeout);
/*
* Waitqueues which are removed from the waitqueue_head at wakeup time
diff --git a/init/main.c b/init/main.c
index eb8bdbae4fc..0eb1c7463fe 100644
--- a/init/main.c
+++ b/init/main.c
@@ -436,15 +436,16 @@ static void noinline __init_refok rest_init(void)
/*
* The boot idle thread must execute schedule()
- * at least one to get things moving:
+ * at least once to get things moving:
*/
+ init_idle_bootup_task(current);
preempt_enable_no_resched();
schedule();
preempt_disable();
/* Call into cpu_idle with preempt disabled */
cpu_idle();
-}
+}
/* Check for early params. */
static int __init do_early_param(char *param, char *val)
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index c0148ae992c..81e69782963 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -99,9 +99,10 @@ void __delayacct_blkio_end(void)
int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
{
s64 tmp;
- struct timespec ts;
- unsigned long t1,t2,t3;
+ unsigned long t1;
+ unsigned long long t2, t3;
unsigned long flags;
+ struct timespec ts;
/* Though tsk->delays accessed later, early exit avoids
* unnecessary returning of other data
@@ -124,11 +125,10 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
d->cpu_count += t1;
- jiffies_to_timespec(t2, &ts);
- tmp = (s64)d->cpu_delay_total + timespec_to_ns(&ts);
+ tmp = (s64)d->cpu_delay_total + t2;
d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp;
- tmp = (s64)d->cpu_run_virtual_total + (s64)jiffies_to_usecs(t3) * 1000;
+ tmp = (s64)d->cpu_run_virtual_total + t3;
d->cpu_run_virtual_total =
(tmp < (s64)d->cpu_run_virtual_total) ? 0 : tmp;
diff --git a/kernel/exit.c b/kernel/exit.c
index 5c8ecbaa19a..ca6a11b7302 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -122,9 +122,9 @@ static void __exit_signal(struct task_struct *tsk)
sig->maj_flt += tsk->maj_flt;
sig->nvcsw += tsk->nvcsw;
sig->nivcsw += tsk->nivcsw;
- sig->sched_time += tsk->sched_time;
sig->inblock += task_io_get_inblock(tsk);
sig->oublock += task_io_get_oublock(tsk);
+ sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
sig = NULL; /* Marker for below. */
}
@@ -182,7 +182,6 @@ repeat:
zap_leader = (leader->exit_signal == -1);
}
- sched_exit(p);
write_unlock_irq(&tasklist_lock);
proc_flush_task(p);
release_thread(p);
@@ -291,7 +290,7 @@ static void reparent_to_kthreadd(void)
/* Set the exit signal to SIGCHLD so we signal init on exit */
current->exit_signal = SIGCHLD;
- if (!has_rt_policy(current) && (task_nice(current) < 0))
+ if (task_nice(current) < 0)
set_user_nice(current, 0);
/* cpus_allowed? */
/* rt_priority? */
diff --git a/kernel/fork.c b/kernel/fork.c
index 73ad5cda1bc..da3a155bba0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -877,7 +877,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
- sig->sched_time = 0;
+ sig->sum_sched_runtime = 0;
INIT_LIST_HEAD(&sig->cpu_timers[0]);
INIT_LIST_HEAD(&sig->cpu_timers[1]);
INIT_LIST_HEAD(&sig->cpu_timers[2]);
@@ -1040,7 +1040,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->utime = cputime_zero;
p->stime = cputime_zero;
- p->sched_time = 0;
+
#ifdef CONFIG_TASK_XACCT
p->rchar = 0; /* I/O counter: bytes read */
p->wchar = 0; /* I/O counter: bytes written */
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 1de710e1837..b53c8fcd9d8 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -161,7 +161,7 @@ static inline cputime_t virt_ticks(struct task_struct *p)
}
static inline unsigned long long sched_ns(struct task_struct *p)
{
- return (p == current) ? current_sched_time(p) : p->sched_time;
+ return task_sched_runtime(p);
}
int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
@@ -246,10 +246,10 @@ static int cpu_clock_sample_group_locked(unsigned int clock_idx,
} while (t != p);
break;
case CPUCLOCK_SCHED:
- cpu->sched = p->signal->sched_time;
+ cpu->sched = p->signal->sum_sched_runtime;
/* Add in each other live thread. */
while ((t = next_thread(t)) != p) {
- cpu->sched += t->sched_time;
+ cpu->sched += t->se.sum_exec_runtime;
}
cpu->sched += sched_ns(p);
break;
@@ -422,7 +422,7 @@ int posix_cpu_timer_del(struct k_itimer *timer)
*/
static void cleanup_timers(struct list_head *head,
cputime_t utime, cputime_t stime,
- unsigned long long sched_time)
+ unsigned long long sum_exec_runtime)
{
struct cpu_timer_list *timer, *next;
cputime_t ptime = cputime_add(utime, stime);
@@ -451,10 +451,10 @@ static void cleanup_timers(struct list_head *head,
++head;
list_for_each_entry_safe(timer, next, head, entry) {
list_del_init(&timer->entry);
- if (timer->expires.sched < sched_time) {
+ if (timer->expires.sched < sum_exec_runtime) {
timer->expires.sched = 0;
} else {
- timer->expires.sched -= sched_time;
+ timer->expires.sched -= sum_exec_runtime;
}
}
}
@@ -467,7 +467,7 @@ static void cleanup_timers(struct list_head *head,
void posix_cpu_timers_exit(struct task_struct *tsk)
{
cleanup_timers(tsk->cpu_timers,
- tsk->utime, tsk->stime, tsk->sched_time);
+ tsk->utime, tsk->stime, tsk->se.sum_exec_runtime);
}
void posix_cpu_timers_exit_group(struct task_struct *tsk)
@@ -475,7 +475,7 @@ void posix_cpu_timers_exit_group(struct task_struct *tsk)
cleanup_timers(tsk->signal->cpu_timers,
cputime_add(tsk->utime, tsk->signal->utime),
cputime_add(tsk->stime, tsk->signal->stime),
- tsk->sched_time + tsk->signal->sched_time);
+ tsk->se.sum_exec_runtime + tsk->signal->sum_sched_runtime);
}
@@ -536,7 +536,7 @@ static void process_timer_rebalance(struct task_struct *p,
nsleft = max_t(unsigned long long, nsleft, 1);
do {
if (likely(!(t->flags & PF_EXITING))) {
- ns = t->sched_time + nsleft;
+ ns = t->se.sum_exec_runtime + nsleft;
if (t->it_sched_expires == 0 ||
t->it_sched_expires > ns) {
t->it_sched_expires = ns;
@@ -1004,7 +1004,7 @@ static void check_thread_timers(struct task_struct *tsk,
struct cpu_timer_list *t = list_first_entry(timers,
struct cpu_timer_list,
entry);
- if (!--maxfire || tsk->sched_time < t->expires.sched) {
+ if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) {
tsk->it_sched_expires = t->expires.sched;
break;
}
@@ -1024,7 +1024,7 @@ static void check_process_timers(struct task_struct *tsk,
int maxfire;
struct signal_struct *const sig = tsk->signal;
cputime_t utime, stime, ptime, virt_expires, prof_expires;
- unsigned long long sched_time, sched_expires;
+ unsigned long long sum_sched_runtime, sched_expires;
struct task_struct *t;
struct list_head *timers = sig->cpu_timers;
@@ -1044,12 +1044,12 @@ static void check_process_timers(struct task_struct *tsk,
*/
utime = sig->utime;
stime = sig->stime;
- sched_time = sig->sched_time;
+ sum_sched_runtime = sig->sum_sched_runtime;
t = tsk;
do {
utime = cputime_add(utime, t->utime);
stime = cputime_add(stime, t->stime);
- sched_time += t->sched_time;
+ sum_sched_runtime += t->se.sum_exec_runtime;
t = next_thread(t);
} while (t != tsk);
ptime = cputime_add(utime, stime);
@@ -1090,7 +1090,7 @@ static void check_process_timers(struct task_struct *tsk,
struct cpu_timer_list *t = list_first_entry(timers,
struct cpu_timer_list,
entry);
- if (!--maxfire || sched_time < t->expires.sched) {
+ if (!--maxfire || sum_sched_runtime < t->expires.sched) {
sched_expires = t->expires.sched;
break;
}
@@ -1182,7 +1182,7 @@ static void check_process_timers(struct task_struct *tsk,
virt_left = cputime_sub(virt_expires, utime);
virt_left = cputime_div_non_zero(virt_left, nthreads);
if (sched_expires) {
- sched_left = sched_expires - sched_time;
+ sched_left = sched_expires - sum_sched_runtime;
do_div(sched_left, nthreads);
sched_left = max_t(unsigned long long, sched_left, 1);
} else {
@@ -1208,7 +1208,7 @@ static void check_process_timers(struct task_struct *tsk,
t->it_virt_expires = ticks;
}
- sched = t->sched_time + sched_left;
+ sched = t->se.sum_exec_runtime + sched_left;
if (sched_expires && (t->it_sched_expires == 0 ||
t->it_sched_expires > sched)) {
t->it_sched_expires = sched;
@@ -1300,7 +1300,7 @@ void run_posix_cpu_timers(struct task_struct *tsk)
if (UNEXPIRED(prof) && UNEXPIRED(virt) &&
(tsk->it_sched_expires == 0 ||
- tsk->sched_time < tsk->it_sched_expires))
+ tsk->se.sum_exec_runtime < tsk->it_sched_expires))
return;
#undef UNEXPIRED
diff --git a/kernel/sched.c b/kernel/sched.c
index 50e1a312269..9fbced64bfe 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -16,13 +16,19 @@
* by Davide Libenzi, preemptible kernel bits by Robert Love.
* 2003-09-03 Interactivity tuning by Con Kolivas.
* 2004-04-02 Scheduler domains code by Nick Piggin
+ * 2007-04-15 Work begun on replacing all interactivity tuning with a
+ * fair scheduling design by Con Kolivas.
+ * 2007-05-05 Load balancing (smp-nice) and other improvements
+ * by Peter Williams
+ * 2007-05-06 Interactivity improvements to CFS by Mike Galbraith
+ * 2007-07-01 Group scheduling enhancements by Srivatsa Vaddagiri
*/
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/nmi.h>
#include <linux/init.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/highmem.h>
#include <linux/smp_lock.h>
#include <asm/mmu_context.h>
@@ -53,9 +59,9 @@
#include <linux/kprobes.h>
#include <linux/delayacct.h>
#include <linux/reciprocal_div.h>
+#include <linux/unistd.h>
#include <asm/tlb.h>
-#include <asm/unistd.h>
/*
* Scheduler clock - returns current time in nanosec units.
@@ -91,6 +97,9 @@ unsigned long long __attribute__((weak)) sched_clock(void)
#define NS_TO_JIFFIES(TIME) ((TIME) / (1000000000 / HZ))
#define JIFFIES_TO_NS(TIME) ((TIME) * (1000000000 / HZ))
+#define NICE_0_LOAD SCHED_LOAD_SCALE
+#define NICE_0_SHIFT SCHED_LOAD_SHIFT
+
/*
* These are the 'tuning knobs' of the scheduler:
*
@@ -100,87 +109,6 @@ unsigned long long __attribute__((weak)) sched_clock(void)
*/
#define MIN_TIMESLICE max(5 * HZ / 1000, 1)
#define DEF_TIMESLICE (100 * HZ / 1000)
-#define ON_RUNQUEUE_WEIGHT 30
-#define CHILD_PENALTY 95
-#define PARENT_PENALTY 100
-#define EXIT_WEIGHT 3
-#define PRIO_BONUS_RATIO 25
-#define MAX_BONUS (MAX_USER_PRIO * PRIO_BONUS_RATIO / 100)
-#define INTERACTIVE_DELTA 2
-#define MAX_SLEEP_AVG (DEF_TIMESLICE * MAX_BONUS)
-#define STARVATION_LIMIT (MAX_SLEEP_AVG)
-#define NS_MAX_SLEEP_AVG (JIFFIES_TO_NS(MAX_SLEEP_AVG))
-
-/*
- * If a task is 'interactive' then we reinsert it in the active
- * array after it has expired its current timeslice. (it will not
- * continue to run immediately, it will still roundrobin with
- * other interactive tasks.)
- *
- * This part scales the interactivity limit depending on niceness.
- *
- * We scale it linearly, offset by the INTERACTIVE_DELTA delta.
- * Here are a few examples of different nice levels:
- *
- * TASK_INTERACTIVE(-20): [1,1,1,1,1,1,1,1,1,0,0]
- * TASK_INTERACTIVE(-10): [1,1,1,1,1,1,1,0,0,0,0]
- * TASK_INTERACTIVE( 0): [1,1,1,1,0,0,0,0,0,0,0]
- * TASK_INTERACTIVE( 10): [1,1,0,0,0,0,0,0,0,0,0]
- * TASK_INTERACTIVE( 19): [0,0,0,0,0,0,0,0,0,0,0]
- *
- * (the X axis represents the possible -5 ... 0 ... +5 dynamic
- * priority range a task can explore, a value of '1' means the
- * task is rated interactive.)
- *
- * Ie. nice +19 tasks can never get 'interactive' enough to be
- * reinserted into the active array. And only heavily CPU-hog nice -20
- * tasks will be expired. Default nice 0 tasks are somewhere between,
- * it takes some effort for them to get interactive, but it's not
- * too hard.
- */
-
-#define CURRENT_BONUS(p) \
- (NS_TO_JIFFIES((p)->sleep_avg) * MAX_BONUS / \
- MAX_SLEEP_AVG)
-
-#define GRANULARITY (10 * HZ / 1000 ? : 1)
-
-#ifdef CONFIG_SMP
-#define TIMESLICE_GRANULARITY(p) (GRANULARITY * \
- (1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1)) * \
- num_online_cpus())
-#else
-#define TIMESLICE_GRANULARITY(p) (GRANULARITY * \
- (1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1)))
-#endif
-
-#define SCALE(v1,v1_max,v2_max) \
- (v1) * (v2_max) / (v1_max)
-
-#define DELTA(p) \
- (SCALE(TASK_NICE(p) + 20, 40, MAX_BONUS) - 20 * MAX_BONUS / 40 + \
- INTERACTIVE_DELTA)
-
-#define TASK_INTERACTIVE(p) \
- ((p)->prio <= (p)->static_prio - DELTA(p))
-
-#define INTERACTIVE_SLEEP(p) \
- (JIFFIES_TO_NS(MAX_SLEEP_AVG * \
- (MAX_BONUS / 2 + DELTA((p)) + 1) / MAX_BONUS - 1))
-
-#define TASK_PREEMPTS_CURR(p, rq) \
- ((p)->prio < (rq)->curr->prio)
-
-#define SCALE_PRIO(x, prio) \
- max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_TIMESLICE)
-
-static unsigned int static_prio_timeslice(int static_prio)
-{
- if (static_prio < NICE_TO_PRIO(0))
- return SCALE_PRIO(DEF_TIMESLICE * 4, static_prio);
- else
- return SCALE_PRIO(DEF_TIMESLICE, static_prio);
-}
#ifdef CONFIG_SMP
/*
@@ -203,28 +131,87 @@ static inline void sg_inc_cpu_power(struct sched_group *sg, u32 val)
}
#endif
+#define SCALE_PRIO(x, prio) \
+ max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_TIMESLICE)
+
/*
- * task_timeslice() scales user-nice values [ -20 ... 0 ... 19 ]
+ * static_prio_timeslice() scales user-nice values [ -20 ... 0 ... 19 ]
* to time slice values: [800ms ... 100ms ... 5ms]
- *
- * The higher a thread's priority, the bigger timeslices
- * it gets during one round of execution. But even the lowest
- * priority thread gets MIN_TIMESLICE worth of execution time.
*/
+static unsigned int static_prio_timeslice(int static_prio)
+{
+ if (static_prio == NICE_TO_PRIO(19))
+ return 1;
+
+ if (static_prio < NICE_TO_PRIO(0))
+ return SCALE_PRIO(DEF_TIMESLICE * 4, static_prio);
+ else
+ return SCALE_PRIO(DEF_TIMESLICE, static_prio);
+}
+
+static inline int rt_policy(int policy)
+{
+ if (unlikely(policy == SCHED_FIFO) || unlikely(policy == SCHED_RR))
+ return 1;
+ return 0;
+}
-static inline unsigned int task_timeslice(struct task_struct *p)
+static inline int task_has_rt_policy(struct task_struct *p)
{
- return static_prio_timeslice(p->static_prio);
+ return rt_policy(p->policy);
}
/*
- * These are the runqueue data structures:
+ * This is the priority-queue data structure of the RT scheduling class:
*/
+struct rt_prio_array {
+ DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1); /* include 1 bit for delimiter */
+ struct list_head queue[MAX_RT_PRIO];
+};
+
+struct load_stat {
+ struct load_weight load;
+ u64 load_update_start, load_update_last;
+ unsigned long delta_fair, delta_exec, delta_stat;
+};
+
+/* CFS-related fields in a runqueue */
+struct cfs_rq {
+ struct load_weight load;
+ unsigned long nr_running;
+
+ s64 fair_clock;
+ u64 exec_clock;
+ s64 wait_runtime;
+ u64 sleeper_bonus;
+ unsigned long wait_runtime_overruns, wait_runtime_underruns;
+
+ struct rb_root tasks_timeline;
+ struct rb_node *rb_leftmost;
+ struct rb_node *rb_load_balance_curr;
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ /* 'curr' points to currently running entity on this cfs_rq.
+ * It is set to NULL otherwise (i.e when none are currently running).
+ */
+ struct sched_entity *curr;
+ struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */
-struct prio_array {
- unsigned int nr_active;
- DECLARE_BITMAP(bitmap, MAX_PRIO+1); /* include 1 bit for delimiter */
- struct list_head queue[MAX_PRIO];
+ /* leaf cfs_rqs are those that hold tasks (lowest schedulable entity in
+ * a hierarchy). Non-leaf lrqs hold other higher schedulable entities
+ * (like users, containers etc.)
+ *
+ * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This
+ * list is used during load balance.
+ */
+ struct list_head leaf_cfs_rq_list; /* Better name : task_cfs_rq_list? */
+#endif
+};
+
+/* Real-Time classes' related field in a runqueue: */
+struct rt_rq {
+ struct rt_prio_array active;
+ int rt_load_balance_idx;
+ struct list_head *rt_load_balance_head, *rt_load_balance_curr;
};
/*
@@ -235,22 +222,28 @@ struct prio_array {
* acquire operations must be ordered by ascending &runqueue.
*/
struct rq {
- spinlock_t lock;
+ spinlock_t lock; /* runqueue lock */
/*
* nr_running and cpu_load should be in the same cacheline because
* remote CPUs use both these fields when doing load calculation.
*/
unsigned long nr_running;
- unsigned long raw_weighted_load;
-#ifdef CONFIG_SMP
- unsigned long cpu_load[3];
+ #define CPU_LOAD_IDX_MAX 5
+ unsigned long cpu_load[CPU_LOAD_IDX_MAX];
unsigned char idle_at_tick;
#ifdef CONFIG_NO_HZ
unsigned char in_nohz_recently;
#endif
+ struct load_stat ls; /* capture load from *all* tasks on this cpu */
+ unsigned long nr_load_updates;
+ u64 nr_switches;
+
+ struct cfs_rq cfs;
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ struct list_head leaf_cfs_rq_list; /* list of leaf cfs_rq on this cpu */
#endif
- unsigned long long nr_switches;
+ struct rt_rq rt;
/*
* This is part of a global counter where only the total sum
@@ -260,14 +253,18 @@ struct rq {
*/
unsigned long nr_uninterruptible;
- unsigned long expired_timestamp;
- /* Cached timestamp set by update_cpu_clock() */
- unsigned long long most_recent_timestamp;
struct task_struct *curr, *idle;
unsigned long next_balance;
struct mm_struct *prev_mm;
- struct prio_array *active, *expired, arrays[2];
- int best_expired_prio;
+
+ u64 clock, prev_clock_raw;
+ s64 clock_max_delta;
+
+ unsigned int clock_warps, clock_overflows;
+ unsigned int clock_unstable_events;
+
+ struct sched_class *load_balance_class;
+
atomic_t nr_iowait;
#ifdef CONFIG_SMP
@@ -307,6 +304,11 @@ struct rq {
static DEFINE_PER_CPU(struct rq, runqueues) ____cacheline_aligned_in_smp;
static DEFINE_MUTEX(sched_hotcpu_mutex);
+static inline void check_preempt_curr(struct rq *rq, struct task_struct *p)
+{
+ rq->curr->sched_class->check_preempt_curr(rq, p);
+}
+
static inline int cpu_of(struct rq *rq)
{
#ifdef CONFIG_SMP
@@ -317,6 +319,52 @@ static inline int cpu_of(struct rq *rq)
}
/*
+ * Per-runqueue clock, as finegrained as the platform can give us:
+ */
+static unsigned long long __rq_clock(struct rq *rq)
+{
+ u64 prev_raw = rq->prev_clock_raw;
+ u64 now = sched_clock();
+ s64 delta = now - prev_raw;
+ u64 clock = rq->clock;
+
+ /*
+ * Protect against sched_clock() occasionally going backwards:
+ */
+ if (unlikely(delta < 0)) {
+ clock++;
+ rq->clock_warps++;
+ } else {
+ /*
+ * Catch too large forward jumps too:
+ */
+ if (unlikely(delta > 2*TICK_NSEC)) {
+ clock++;
+ rq->clock_overflows++;
+ } else {
+ if (unlikely(delta > rq->clock_max_delta))
+ rq->clock_max_delta = delta;
+ clock += delta;
+ }
+ }
+
+ rq->prev_clock_raw = now;
+ rq->clock = clock;
+
+ return clock;
+}
+
+static inline unsigned long long rq_clock(struct rq *rq)
+{
+ int this_cpu = smp_processor_id();
+
+ if (this_cpu == cpu_of(rq))
+ return __rq_clock(rq);
+
+ return rq->clock;
+}
+
+/*
* The domain tree (rq->sd) is protected by RCU's quiescent state transition.
* See detach_destroy_domains: synchronize_sched for details.
*
@@ -331,6 +379,18 @@ static inline int cpu_of(struct rq *rq)
#define task_rq(p) cpu_rq(task_cpu(p))
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
+#ifdef CONFIG_FAIR_GROUP_SCHED
+/* Change a task's ->cfs_rq if it moves across CPUs */
+static inline void set_task_cfs_rq(struct task_struct *p)
+{
+ p->se.cfs_rq = &task_rq(p)->cfs;
+}
+#else
+static inline void set_task_cfs_rq(struct task_struct *p)
+{
+}
+#endif
+
#ifndef prepare_arch_switch
# define prepare_arch_switch(next) do { } while (0)
#endif
@@ -460,134 +520,6 @@ static inline void task_rq_unlock(struct rq *rq, unsigned long *flags)
spin_unlock_irqrestore(&rq->lock, *flags);
}
-#ifdef CONFIG_SCHEDSTATS
-/*
- * bump this up when changing the output format or the meaning of an existing
- * format, so that tools can adapt (or abort)
- */
-#define SCHEDSTAT_VERSION 14
-
-static int show_schedstat(struct seq_file *seq, void *v)
-{
- int cpu;
-
- seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION);
- seq_printf(seq, "timestamp %lu\n", jiffies);
- for_each_online_cpu(cpu) {
- struct rq *rq = cpu_rq(cpu);
-#ifdef CONFIG_SMP
- struct sched_domain *sd;
- int dcnt = 0;
-#endif
-
- /* runqueue-specific stats */
- seq_printf(seq,
- "cpu%d %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
- cpu, rq->yld_both_empty,
- rq->yld_act_empty, rq->yld_exp_empty, rq->yld_cnt,
- rq->sched_switch, rq->sched_cnt, rq->sched_goidle,
- rq->ttwu_cnt, rq->ttwu_local,
- rq->rq_sched_info.cpu_time,
- rq->rq_sched_info.run_delay, rq->rq_sched_info.pcnt);
-
- seq_printf(seq, "\n");
-
-#ifdef CONFIG_SMP
- /* domain-specific stats */
- preempt_disable();
- for_each_domain(cpu, sd) {
- enum idle_type itype;
- char mask_str[NR_CPUS];
-
- cpumask_scnprintf(mask_str, NR_CPUS, sd->span);
- seq_printf(seq, "domain%d %s", dcnt++, mask_str);
- for (itype = SCHED_IDLE; itype < MAX_IDLE_TYPES;
- itype++) {
- seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu "
- "%lu",
- sd->lb_cnt[itype],
- sd->lb_balanced[itype],
- sd->lb_failed[itype],
- sd->lb_imbalance[itype],
- sd->lb_gained[itype],
- sd->lb_hot_gained[itype],
- sd->lb_nobusyq[itype],
- sd->lb_nobusyg[itype]);
- }
- seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu"
- " %lu %lu %lu\n",
- sd->alb_cnt, sd->alb_failed, sd->alb_pushed,
- sd->sbe_cnt, sd->sbe_balanced, sd->sbe_pushed,
- sd->sbf_cnt, sd->sbf_balanced, sd->sbf_pushed,
- sd->ttwu_wake_remote, sd->ttwu_move_affine,
- sd->ttwu_move_balance);
- }
- preempt_enable();
-#endif
- }
- return 0;
-}
-
-static int schedstat_open(struct inode *inode, struct file *file)
-{
- unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32);
- char *buf = kmalloc(size, GFP_KERNEL);
- struct seq_file *m;
- int res;
-
- if (!buf)
- return -ENOMEM;
- res = single_open(file, show_schedstat, NULL);
- if (!res) {
- m = file->private_data;
- m->buf = buf;
- m->size = size;
- } else
- kfree(buf);
- return res;
-}
-
-const struct file_operations proc_schedstat_operations = {
- .open = schedstat_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-/*
- * Expects runqueue lock to be held for atomicity of update
- */
-static inline void
-rq_sched_info_arrive(struct rq *rq, unsigned long delta_jiffies)
-{
- if (rq) {
- rq->rq_sched_info.run_delay += delta_jiffies;
- rq->rq_sched_info.pcnt++;
- }
-}
-
-/*
- * Expects runqueue lock to be held for atomicity of update
- */
-static inline void
-rq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies)
-{
- if (rq)
- rq->rq_sched_info.cpu_time += delta_jiffies;
-}
-# define schedstat_inc(rq, field) do { (rq)->field++; } while (0)
-# define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0)
-#else /* !CONFIG_SCHEDSTATS */
-static inline void
-rq_sched_info_arrive(struct rq *rq, unsigned long delta_jiffies)
-{}
-static inline void
-rq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies)
-{}
-# define schedstat_inc(rq, field) do { } while (0)
-# define schedstat_add(rq, field, amt) do { } while (0)
-#endif
-
/*
* this_rq_lock - lock this runqueue and disable interrupts.
*/
@@ -603,177 +535,172 @@ static inline struct rq *this_rq_lock(void)
return rq;
}
-#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
/*
- * Called when a process is dequeued from the active array and given
- * the cpu. We should note that with the exception of interactive
- * tasks, the expired queue will become the active queue after the active
- * queue is empty, without explicitly dequeuing and requeuing tasks in the
- * expired queue. (Interactive tasks may be requeued directly to the
- * active queue, thus delaying tasks in the expired queue from running;
- * see scheduler_tick()).
- *
- * This function is only called from sched_info_arrive(), rather than
- * dequeue_task(). Even though a task may be queued and dequeued multiple
- * times as it is shuffled about, we're really interested in knowing how
- * long it was from the *first* time it was queued to the time that it
- * finally hit a cpu.
+ * CPU frequency is/was unstable - start new by setting prev_clock_raw:
*/
-static inline void sched_info_dequeued(struct task_struct *t)
+void sched_clock_unstable_event(void)
{
- t->sched_info.last_queued = 0;
+ unsigned long flags;
+ struct rq *rq;
+
+ rq = task_rq_lock(current, &flags);
+ rq->prev_clock_raw = sched_clock();
+ rq->clock_unstable_events++;
+ task_rq_unlock(rq, &flags);
}
/*
- * Called when a task finally hits the cpu. We can now calculate how
- * long it was waiting to run. We also note when it began so that we
- * can keep stats on how long its timeslice is.
+ * resched_task - mark a task 'to be rescheduled now'.
+ *
+ * On UP this means the setting of the need_resched flag, on SMP it
+ * might also involve a cross-CPU call to trigger the scheduler on
+ * the target CPU.
*/
-static void sched_info_arrive(struct task_struct *t)
+#ifdef CONFIG_SMP
+
+#ifndef tsk_is_polling
+#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
+#endif
+
+static void resched_task(struct task_struct *p)
{
- unsigned long now = jiffies, delta_jiffies = 0;
+ int cpu;
+
+ assert_spin_locked(&task_rq(p)->lock);
+
+ if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED)))
+ return;
- if (t->sched_info.last_queued)
- delta_jiffies = now - t->sched_info.last_queued;
- sched_info_dequeued(t);
- t->sched_info.run_delay += delta_jiffies;
- t->sched_info.last_arrival = now;
- t->sched_info.pcnt++;
+ set_tsk_thread_flag(p, TIF_NEED_RESCHED);
+
+ cpu = task_cpu(p);
+ if (cpu == smp_processor_id())
+ return;
- rq_sched_info_arrive(task_rq(t), delta_jiffies);
+ /* NEED_RESCHED must be visible before we test polling */
+ smp_mb();
+ if (!tsk_is_polling(p))
+ smp_send_reschedule(cpu);
}
-/*
- * Called when a process is queued into either the active or expired
- * array. The time is noted and later used to determine how long we
- * had to wait for us to reach the cpu. Since the expired queue will
- * become the active queue after active queue is empty, without dequeuing
- * and requeuing any tasks, we are interested in queuing to either. It
- * is unusual but not impossible for tasks to be dequeued and immediately
- * requeued in the same or another array: this can happen in sched_yield(),
- * set_user_nice(), and even load_balance() as it moves tasks from runqueue
- * to runqueue.
- *
- * This function is only called from enqueue_task(), but also only updates
- * the timestamp if it is already not set. It's assumed that
- * sched_info_dequeued() will clear that stamp when appropriate.
- */
-static inline void sched_info_queued(struct task_struct *t)
+static void resched_cpu(int cpu)
{
- if (unlikely(sched_info_on()))
- if (!t->sched_info.last_queued)
- t->sched_info.last_queued = jiffies;
+ struct rq *rq = cpu_rq(cpu);
+ unsigned long flags;
+
+ if (!spin_trylock_irqsave(&rq->lock, flags))
+ return;
+ resched_task(cpu_curr(cpu));
+ spin_unlock_irqrestore(&rq->lock, flags);
}
+#else
+static inline void resched_task(struct task_struct *p)
+{
+ assert_spin_locked(&task_rq(p)->lock);
+ set_tsk_need_resched(p);
+}
+#endif
-/*
- * Called when a process ceases being the active-running process, either
- * voluntarily or involuntarily. Now we can calculate how long we ran.
- */
-static inline void sched_info_depart(struct task_struct *t)
+static u64 div64_likely32(u64 divident, unsigned long divisor)
{
- unsigned long delta_jiffies = jiffies - t->sched_info.last_arrival;
+#if BITS_PER_LONG == 32
+ if (likely(divident <= 0xffffffffULL))
+ return (u32)divident / divisor;
+ do_div(divident, divisor);
- t->sched_info.cpu_time += delta_jiffies;
- rq_sched_info_depart(task_rq(t), delta_jiffies);
+ return divident;
+#else
+ return divident / divisor;
+#endif
}
-/*
- * Called when tasks are switched involuntarily due, typically, to expiring
- * their time slice. (This may also be called when switching to or from
- * the idle task.) We are only called when prev != next.
- */
-static inline void
-__sched_info_switch(struct task_struct *prev, struct task_struct *next)
+#if BITS_PER_LONG == 32
+# define WMULT_CONST (~0UL)
+#else
+# define WMULT_CONST (1UL << 32)
+#endif
+
+#define WMULT_SHIFT 32
+
+static inline unsigned long
+calc_delta_mine(unsigned long delta_exec, unsigned long weight,
+ struct load_weight *lw)
{
- struct rq *rq = task_rq(prev);
+ u64 tmp;
+ if (unlikely(!lw->inv_weight))
+ lw->inv_weight = WMULT_CONST / lw->weight;
+
+ tmp = (u64)delta_exec * weight;
/*
- * prev now departs the cpu. It's not interesting to record
- * stats about how efficient we were at scheduling the idle
- * process, however.
+ * Check whether we'd overflow the 64-bit multiplication:
*/
- if (prev != rq->idle)
- sched_info_depart(prev);
+ if (unlikely(tmp > WMULT_CONST)) {
+ tmp = ((tmp >> WMULT_SHIFT/2) * lw->inv_weight)
+ >> (WMULT_SHIFT/2);
+ } else {
+ tmp = (tmp * lw->inv_weight) >> WMULT_SHIFT;
+ }
- if (next != rq->idle)
- sched_info_arrive(next);
-}
-static inline void
-sched_info_switch(struct task_struct *prev, struct task_struct *next)
-{
- if (unlikely(sched_info_on()))
- __sched_info_switch(prev, next);
+ return (unsigned long)min(tmp, (u64)sysctl_sched_runtime_limit);
}
-#else
-#define sched_info_queued(t) do { } while (0)
-#define sched_info_switch(t, next) do { } while (0)
-#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */
-/*
- * Adding/removing a task to/from a priority array:
- */
-static void dequeue_task(struct task_struct *p, struct prio_array *array)
+static inline unsigned long
+calc_delta_fair(unsigned long delta_exec, struct load_weight *lw)
{
- array->nr_active--;
- list_del(&p->run_list);
- if (list_empty(array->queue + p->prio))
- __clear_bit(p->prio, array->bitmap);
+ return calc_delta_mine(delta_exec, NICE_0_LOAD, lw);
}
-static void enqueue_task(struct task_struct *p, struct prio_array *array)
+static void update_load_add(struct load_weight *lw, unsigned long inc)
{
- sched_info_queued(p);
- list_add_tail(&p->run_list, array->queue + p->prio);
- __set_bit(p->prio, array->bitmap);
- array->nr_active++;
- p->array = array;
+ lw->weight += inc;
+ lw->inv_weight = 0;
}
-/*
- * Put task to the end of the run list without the overhead of dequeue
- * followed by enqueue.
- */
-static void requeue_task(struct task_struct *p, struct prio_array *array)
+static void update_load_sub(struct load_weight *lw, unsigned long dec)
{
- list_move_tail(&p->run_list, array->queue + p->prio);
+ lw->weight -= dec;
+ lw->inv_weight = 0;
}
-static inline void
-enqueue_task_head(struct task_struct *p, struct prio_array *array)
+static void __update_curr_load(struct rq *rq, struct load_stat *ls)
{
- list_add(&p->run_list, array->queue + p->prio);
- __set_bit(p->prio, array->bitmap);
- array->nr_active++;
- p->array = array;
+ if (rq->curr != rq->idle && ls->load.weight) {
+ ls->delta_exec += ls->delta_stat;
+ ls->delta_fair += calc_delta_fair(ls->delta_stat, &ls->load);
+ ls->delta_stat = 0;
+ }
}
/*
- * __normal_prio - return the priority that is based on the static
- * priority but is modified by bonuses/penalties.
+ * Update delta_exec, delta_fair fields for rq.
*
- * We scale the actual sleep average [0 .... MAX_SLEEP_AVG]
- * into the -5 ... 0 ... +5 bonus/penalty range.
+ * delta_fair clock advances at a rate inversely proportional to
+ * total load (rq->ls.load.weight) on the runqueue, while
+ * delta_exec advances at the same rate as wall-clock (provided
+ * cpu is not idle).
*
- * We use 25% of the full 0...39 priority range so that:
+ * delta_exec / delta_fair is a measure of the (smoothened) load on this
+ * runqueue over any given interval. This (smoothened) load is used
+ * during load balance.
*
- * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
- * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks.
- *
- * Both properties are important to certain workloads.
+ * This function is called /before/ updating rq->ls.load
+ * and when switching tasks.
*/
-
-static inline int __normal_prio(struct task_struct *p)
+static void update_curr_load(struct rq *rq, u64 now)
{
- int bonus, prio;
-
- bonus = CURRENT_BONUS(p) - MAX_BONUS / 2;
+ struct load_stat *ls = &rq->ls;
+ u64 start;
- prio = p->static_prio - bonus;
- if (prio < MAX_RT_PRIO)
- prio = MAX_RT_PRIO;
- if (prio > MAX_PRIO-1)
- prio = MAX_PRIO-1;
- return prio;
+ start = ls->load_update_start;
+ ls->load_update_start = now;
+ ls->delta_stat += now - start;
+ /*
+ * Stagger updates to ls->delta_fair. Very frequent updates
+ * can be expensive.
+ */
+ if (ls->delta_stat >= sysctl_sched_stat_granularity)
+ __update_curr_load(rq, ls);
}
/*
@@ -791,53 +718,146 @@ static inline int __normal_prio(struct task_struct *p)
* this code will need modification
*/
#define TIME_SLICE_NICE_ZERO DEF_TIMESLICE
-#define LOAD_WEIGHT(lp) \
+#define load_weight(lp) \
(((lp) * SCHED_LOAD_SCALE) / TIME_SLICE_NICE_ZERO)
#define PRIO_TO_LOAD_WEIGHT(prio) \
- LOAD_WEIGHT(static_prio_timeslice(prio))
+ load_weight(static_prio_timeslice(prio))
#define RTPRIO_TO_LOAD_WEIGHT(rp) \
- (PRIO_TO_LOAD_WEIGHT(MAX_RT_PRIO) + LOAD_WEIGHT(rp))
+ (PRIO_TO_LOAD_WEIGHT(MAX_RT_PRIO) + load_weight(rp))
-static void set_load_weight(struct task_struct *p)
-{
- if (has_rt_policy(p)) {
-#ifdef CONFIG_SMP
- if (p == task_rq(p)->migration_thread)
- /*
- * The migration thread does the actual balancing.
- * Giving its load any weight will skew balancing
- * adversely.
- */
- p->load_weight = 0;
- else
-#endif
- p->load_weight = RTPRIO_TO_LOAD_WEIGHT(p->rt_priority);
- } else
- p->load_weight = PRIO_TO_LOAD_WEIGHT(p->static_prio);
-}
+#define WEIGHT_IDLEPRIO 2
+#define WMULT_IDLEPRIO (1 << 31)
+
+/*
+ * Nice levels are multiplicative, with a gentle 10% change for every
+ * nice level changed. I.e. when a CPU-bound task goes from nice 0 to
+ * nice 1, it will get ~10% less CPU time than another CPU-bound task
+ * that remained on nice 0.
+ *
+ * The "10% effect" is relative and cumulative: from _any_ nice level,
+ * if you go up 1 level, it's -10% CPU usage, if you go down 1 level
+ * it's +10% CPU usage.
+ */
+static const int prio_to_weight[40] = {
+/* -20 */ 88818, 71054, 56843, 45475, 36380, 29104, 23283, 18626, 14901, 11921,
+/* -10 */ 9537, 7629, 6103, 4883, 3906, 3125, 2500, 2000, 1600, 1280,
+/* 0 */ NICE_0_LOAD /* 1024 */,
+/* 1 */ 819, 655, 524, 419, 336, 268, 215, 172, 137,
+/* 10 */ 110, 87, 70, 56, 45, 36, 29, 23, 18, 15,
+};
+
+static const u32 prio_to_wmult[40] = {
+ 48356, 60446, 75558, 94446, 118058, 147573,
+ 184467, 230589, 288233, 360285, 450347,
+ 562979, 703746, 879575, 1099582, 1374389,
+ 717986, 2147483, 2684354, 3355443, 4194304,
+ 244160, 6557201, 8196502, 10250518, 12782640,
+ 16025997, 19976592, 24970740, 31350126, 39045157,
+ 49367440, 61356675, 76695844, 95443717, 119304647,
+ 148102320, 186737708, 238609294, 286331153,
+};
static inline void
-inc_raw_weighted_load(struct rq *rq, const struct task_struct *p)
+inc_load(struct rq *rq, const struct task_struct *p, u64 now)
{
- rq->raw_weighted_load += p->load_weight;
+ update_curr_load(rq, now);
+ update_load_add(&rq->ls.load, p->se.load.weight);
}
static inline void
-dec_raw_weighted_load(struct rq *rq, const struct task_struct *p)
+dec_load(struct rq *rq, const struct task_struct *p, u64 now)
{
- rq->raw_weighted_load -= p->load_weight;
+ update_curr_load(rq, now);
+ update_load_sub(&rq->ls.load, p->se.load.weight);
}
-static inline void inc_nr_running(struct task_struct *p, struct rq *rq)
+static inline void inc_nr_running(struct task_struct *p, struct rq *rq, u64 now)
{
rq->nr_running++;
- inc_raw_weighted_load(rq, p);
+ inc_load(rq, p, now);
}
-static inline void dec_nr_running(struct task_struct *p, struct rq *rq)
+static inline void dec_nr_running(struct task_struct *p, struct rq *rq, u64 now)
{
rq->nr_running--;
- dec_raw_weighted_load(rq, p);
+ dec_load(rq, p, now);
+}
+
+static void activate_task(struct rq *rq, struct task_struct *p, int wakeup);
+
+/*
+ * runqueue iterator, to support SMP load-balancing between different
+ * scheduling classes, without having to expose their internal data
+ * structures to the load-balancing proper:
+ */
+struct rq_iterator {
+ void *arg;
+ struct task_struct *(*start)(void *);
+ struct task_struct *(*next)(void *);
+};
+
+static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
+ unsigned long max_nr_move, unsigned long max_load_move,
+ struct sched_domain *sd, enum cpu_idle_type idle,
+ int *all_pinned, unsigned long *load_moved,
+ int this_best_prio, int best_prio, int best_prio_seen,
+ struct rq_iterator *iterator);
+
+#include "sched_stats.h"
+#include "sched_rt.c"
+#include "sched_fair.c"
+#include "sched_idletask.c"
+#ifdef CONFIG_SCHED_DEBUG
+# include "sched_debug.c"
+#endif
+
+#define sched_class_highest (&rt_sched_class)
+
+static void set_load_weight(struct task_struct *p)
+{
+ task_rq(p)->cfs.wait_runtime -= p->se.wait_runtime;
+ p->se.wait_runtime = 0;
+
+ if (task_has_rt_policy(p)) {
+ p->se.load.weight = prio_to_weight[0] * 2;
+ p->se.load.inv_weight = prio_to_wmult[0] >> 1;
+ return;
+ }
+
+ /*
+ * SCHED_IDLE tasks get minimal weight:
+ */
+ if (p->policy == SCHED_IDLE) {
+ p->se.load.weight = WEIGHT_IDLEPRIO;
+ p->se.load.inv_weight = WMULT_IDLEPRIO;
+ return;
+ }
+
+ p->se.load.weight = prio_to_weight[p->static_prio - MAX_RT_PRIO];
+ p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO];
+}
+
+static void
+enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, u64 now)
+{
+ sched_info_queued(p);
+ p->sched_class->enqueue_task(rq, p, wakeup, now);
+ p->se.on_rq = 1;
+}
+
+static void
+dequeue_task(struct rq *rq, struct task_struct *p, int sleep, u64 now)
+{
+ p->sched_class->dequeue_task(rq, p, sleep, now);
+ p->se.on_rq = 0;
+}
+
+/*
+ * __normal_prio - return the priority that is based on the static prio
+ */
+static inline int __normal_prio(struct task_struct *p)
+{
+ return p->static_prio;
}
/*
@@ -851,7 +871,7 @@ static inline int normal_prio(struct task_struct *p)
{
int prio;
- if (has_rt_policy(p))
+ if (task_has_rt_policy(p))
prio = MAX_RT_PRIO-1 - p->rt_priority;
else
prio = __normal_prio(p);
@@ -879,222 +899,47 @@ static int effective_prio(struct task_struct *p)
}
/*
- * __activate_task - move a task to the runqueue.
- */
-static void __activate_task(struct task_struct *p, struct rq *rq)
-{
- struct prio_array *target = rq->active;
-
- if (batch_task(p))
- target = rq->expired;
- enqueue_task(p, target);
- inc_nr_running(p, rq);
-}
-
-/*
- * __activate_idle_task - move idle task to the _front_ of runqueue.
- */
-static inline void __activate_idle_task(struct task_struct *p, struct rq *rq)
-{
- enqueue_task_head(p, rq->active);
- inc_nr_running(p, rq);
-}
-
-/*
- * Recalculate p->normal_prio and p->prio after having slept,
- * updating the sleep-average too:
+ * activate_task - move a task to the runqueue.
*/
-static int recalc_task_prio(struct task_struct *p, unsigned long long now)
+static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
{
- /* Caller must always ensure 'now >= p->timestamp' */
- unsigned long sleep_time = now - p->timestamp;
-
- if (batch_task(p))
- sleep_time = 0;
-
- if (likely(sleep_time > 0)) {
- /*
- * This ceiling is set to the lowest priority that would allow
- * a task to be reinserted into the active array on timeslice
- * completion.
- */
- unsigned long ceiling = INTERACTIVE_SLEEP(p);
-
- if (p->mm && sleep_time > ceiling && p->sleep_avg < ceiling) {
- /*
- * Prevents user tasks from achieving best priority
- * with one single large enough sleep.
- */
- p->sleep_avg = ceiling;
- /*
- * Using INTERACTIVE_SLEEP() as a ceiling places a
- * nice(0) task 1ms sleep away from promotion, and
- * gives it 700ms to round-robin with no chance of
- * being demoted. This is more than generous, so
- * mark this sleep as non-interactive to prevent the
- * on-runqueue bonus logic from intervening should
- * this task not receive cpu immediately.
- */
- p->sleep_type = SLEEP_NONINTERACTIVE;
- } else {
- /*
- * Tasks waking from uninterruptible sleep are
- * limited in their sleep_avg rise as they
- * are likely to be waiting on I/O
- */
- if (p->sleep_type == SLEEP_NONINTERACTIVE && p->mm) {
- if (p->sleep_avg >= ceiling)
- sleep_time = 0;
- else if (p->sleep_avg + sleep_time >=
- ceiling) {
- p->sleep_avg = ceiling;
- sleep_time = 0;
- }
- }
+ u64 now = rq_clock(rq);
- /*
- * This code gives a bonus to interactive tasks.
- *
- * The boost works by updating the 'average sleep time'
- * value here, based on ->timestamp. The more time a
- * task spends sleeping, the higher the average gets -
- * and the higher the priority boost gets as well.
- */
- p->sleep_avg += sleep_time;
-
- }
- if (p->sleep_avg > NS_MAX_SLEEP_AVG)
- p->sleep_avg = NS_MAX_SLEEP_AVG;
- }
+ if (p->state == TASK_UNINTERRUPTIBLE)
+ rq->nr_uninterruptible--;
- return effective_prio(p);
+ enqueue_task(rq, p, wakeup, now);
+ inc_nr_running(p, rq, now);
}
/*
- * activate_task - move a task to the runqueue and do priority recalculation
- *
- * Update all the scheduling statistics stuff. (sleep average
- * calculation, priority modifiers, etc.)
+ * activate_idle_task - move idle task to the _front_ of runqueue.
*/
-static void activate_task(struct task_struct *p, struct rq *rq, int local)
+static inline void activate_idle_task(struct task_struct *p, struct rq *rq)
{
- unsigned long long now;
+ u64 now = rq_clock(rq);
- if (rt_task(p))
- goto out;
-
- now = sched_clock();
-#ifdef CONFIG_SMP
- if (!local) {
- /* Compensate for drifting sched_clock */
- struct rq *this_rq = this_rq();
- now = (now - this_rq->most_recent_timestamp)
- + rq->most_recent_timestamp;
- }
-#endif
-
- /*
- * Sleep time is in units of nanosecs, so shift by 20 to get a
- * milliseconds-range estimation of the amount of time that the task
- * spent sleeping:
- */
- if (unlikely(prof_on == SLEEP_PROFILING)) {
- if (p->state == TASK_UNINTERRUPTIBLE)
- profile_hits(SLEEP_PROFILING, (void *)get_wchan(p),
- (now - p->timestamp) >> 20);
- }
-
- p->prio = recalc_task_prio(p, now);
+ if (p->state == TASK_UNINTERRUPTIBLE)
+ rq->nr_uninterruptible--;
- /*
- * This checks to make sure it's not an uninterruptible task
- * that is now waking up.
- */
- if (p->sleep_type == SLEEP_NORMAL) {
- /*
- * Tasks which were woken up by interrupts (ie. hw events)
- * are most likely of interactive nature. So we give them
- * the credit of extending their sleep time to the period
- * of time they spend on the runqueue, waiting for execution
- * on a CPU, first time around:
- */
- if (in_interrupt())
- p->sleep_type = SLEEP_INTERRUPTED;
- else {
- /*
- * Normal first-time wakeups get a credit too for
- * on-runqueue time, but it will be weighted down:
- */
- p->sleep_type = SLEEP_INTERACTIVE;
- }
- }
- p->timestamp = now;
-out:
- __activate_task(p, rq);
+ enqueue_task(rq, p, 0, now);
+ inc_nr_running(p, rq, now);
}
/*
* deactivate_task - remove a task from the runqueue.
*/
-static void deactivate_task(struct task_struct *p, struct rq *rq)
-{
- dec_nr_running(p, rq);
- dequeue_task(p, p->array);
- p->array = NULL;
-}
-
-/*
- * resched_task - mark a task 'to be rescheduled now'.
- *
- * On UP this means the setting of the need_resched flag, on SMP it
- * might also involve a cross-CPU call to trigger the scheduler on
- * the target CPU.
- */
-#ifdef CONFIG_SMP
-
-#ifndef tsk_is_polling
-#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
-#endif
-
-static void resched_task(struct task_struct *p)
+static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep)
{
- int cpu;
+ u64 now = rq_clock(rq);
- assert_spin_locked(&task_rq(p)->lock);
+ if (p->state == TASK_UNINTERRUPTIBLE)
+ rq->nr_uninterruptible++;
- if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED)))
- return;
-
- set_tsk_thread_flag(p, TIF_NEED_RESCHED);
-
- cpu = task_cpu(p);
- if (cpu == smp_processor_id())
- return;
-
- /* NEED_RESCHED must be visible before we test polling */
- smp_mb();
- if (!tsk_is_polling(p))
- smp_send_reschedule(cpu);
+ dequeue_task(rq, p, sleep, now);
+ dec_nr_running(p, rq, now);
}
-static void resched_cpu(int cpu)
-{
- struct rq *rq = cpu_rq(cpu);
- unsigned long flags;
-
- if (!spin_trylock_irqsave(&rq->lock, flags))
- return;
- resched_task(cpu_curr(cpu));
- spin_unlock_irqrestore(&rq->lock, flags);
-}
-#else
-static inline void resched_task(struct task_struct *p)
-{
- assert_spin_locked(&task_rq(p)->lock);
- set_tsk_need_resched(p);
-}
-#endif
-
/**
* task_curr - is this task currently executing on a CPU?
* @p: the task in question.
@@ -1107,10 +952,42 @@ inline int task_curr(const struct task_struct *p)
/* Used instead of source_load when we know the type == 0 */
unsigned long weighted_cpuload(const int cpu)
{
- return cpu_rq(cpu)->raw_weighted_load;
+ return cpu_rq(cpu)->ls.load.weight;
+}
+
+static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
+{
+#ifdef CONFIG_SMP
+ task_thread_info(p)->cpu = cpu;
+ set_task_cfs_rq(p);
+#endif
}
#ifdef CONFIG_SMP
+
+void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
+{
+ int old_cpu = task_cpu(p);
+ struct rq *old_rq = cpu_rq(old_cpu), *new_rq = cpu_rq(new_cpu);
+ u64 clock_offset, fair_clock_offset;
+
+ clock_offset = old_rq->clock - new_rq->clock;
+ fair_clock_offset = old_rq->cfs.fair_clock -
+ new_rq->cfs.fair_clock;
+ if (p->se.wait_start)
+ p->se.wait_start -= clock_offset;
+ if (p->se.wait_start_fair)
+ p->se.wait_start_fair -= fair_clock_offset;
+ if (p->se.sleep_start)
+ p->se.sleep_start -= clock_offset;
+ if (p->se.block_start)
+ p->se.block_start -= clock_offset;
+ if (p->se.sleep_start_fair)
+ p->se.sleep_start_fair -= fair_clock_offset;
+
+ __set_task_cpu(p, new_cpu);
+}
+
struct migration_req {
struct list_head list;
@@ -1133,7 +1010,7 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
* If the task is not on a runqueue (and not running), then
* it is sufficient to simply update the task's cpu field.
*/
- if (!p->array && !task_running(rq, p)) {
+ if (!p->se.on_rq && !task_running(rq, p)) {
set_task_cpu(p, dest_cpu);
return 0;
}
@@ -1158,9 +1035,8 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
void wait_task_inactive(struct task_struct *p)
{
unsigned long flags;
+ int running, on_rq;
struct rq *rq;
- struct prio_array *array;
- int running;
repeat:
/*
@@ -1192,7 +1068,7 @@ repeat:
*/
rq = task_rq_lock(p, &flags);
running = task_running(rq, p);
- array = p->array;
+ on_rq = p->se.on_rq;
task_rq_unlock(rq, &flags);
/*
@@ -1215,7 +1091,7 @@ repeat:
* running right now), it's preempted, and we should
* yield - it could be a while.
*/
- if (unlikely(array)) {
+ if (unlikely(on_rq)) {
yield();
goto repeat;
}
@@ -1261,11 +1137,12 @@ void kick_process(struct task_struct *p)
static inline unsigned long source_load(int cpu, int type)
{
struct rq *rq = cpu_rq(cpu);
+ unsigned long total = weighted_cpuload(cpu);
if (type == 0)
- return rq->raw_weighted_load;
+ return total;
- return min(rq->cpu_load[type-1], rq->raw_weighted_load);
+ return min(rq->cpu_load[type-1], total);
}
/*
@@ -1275,11 +1152,12 @@ static inline unsigned long source_load(int cpu, int type)
static inline unsigned long target_load(int cpu, int type)
{
struct rq *rq = cpu_rq(cpu);
+ unsigned long total = weighted_cpuload(cpu);
if (type == 0)
- return rq->raw_weighted_load;
+ return total;
- return max(rq->cpu_load[type-1], rq->raw_weighted_load);
+ return max(rq->cpu_load[type-1], total);
}
/*
@@ -1288,9 +1166,10 @@ static inline unsigned long target_load(int cpu, int type)
static inline unsigned long cpu_avg_load_per_task(int cpu)
{
struct rq *rq = cpu_rq(cpu);
+ unsigned long total = weighted_cpuload(cpu);
unsigned long n = rq->nr_running;
- return n ? rq->raw_weighted_load / n : SCHED_LOAD_SCALE;
+ return n ? total / n : SCHED_LOAD_SCALE;
}
/*
@@ -1392,9 +1271,9 @@ static int sched_balance_self(int cpu, int flag)
struct sched_domain *tmp, *sd = NULL;
for_each_domain(cpu, tmp) {
- /*
- * If power savings logic is enabled for a domain, stop there.
- */
+ /*
+ * If power savings logic is enabled for a domain, stop there.
+ */
if (tmp->flags & SD_POWERSAVINGS_BALANCE)
break;
if (tmp->flags & flag)
@@ -1477,9 +1356,9 @@ static int wake_idle(int cpu, struct task_struct *p)
if (idle_cpu(i))
return i;
}
- }
- else
+ } else {
break;
+ }
}
return cpu;
}
@@ -1521,7 +1400,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
if (!(old_state & state))
goto out;
- if (p->array)
+ if (p->se.on_rq)
goto out_running;
cpu = task_cpu(p);
@@ -1576,11 +1455,11 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
* of the current CPU:
*/
if (sync)
- tl -= current->load_weight;
+ tl -= current->se.load.weight;
if ((tl <= load &&
tl + target_load(cpu, idx) <= tl_per_task) ||
- 100*(tl + p->load_weight) <= imbalance*load) {
+ 100*(tl + p->se.load.weight) <= imbalance*load) {
/*
* This domain has SD_WAKE_AFFINE and
* p is cache cold in this domain, and
@@ -1614,7 +1493,7 @@ out_set_cpu:
old_state = p->state;
if (!(old_state & state))
goto out;
- if (p->array)
+ if (p->se.on_rq)
goto out_running;
this_cpu = smp_processor_id();
@@ -1623,25 +1502,7 @@ out_set_cpu:
out_activate:
#endif /* CONFIG_SMP */
- if (old_state == TASK_UNINTERRUPTIBLE) {
- rq->nr_uninterruptible--;
- /*
- * Tasks on involuntary sleep don't earn
- * sleep_avg beyond just interactive state.
- */
- p->sleep_type = SLEEP_NONINTERACTIVE;
- } else
-
- /*
- * Tasks that have marked their sleep as noninteractive get
- * woken up with their sleep average not weighted in an
- * interactive way.
- */
- if (old_state & TASK_NONINTERACTIVE)
- p->sleep_type = SLEEP_NONINTERACTIVE;
-
-
- activate_task(p, rq, cpu == this_cpu);
+ activate_task(rq, p, 1);
/*
* Sync wakeups (i.e. those types of wakeups where the waker
* has indicated that it will leave the CPU in short order)
@@ -1650,10 +1511,8 @@ out_activate:
* the waker guarantees that the freshly woken up task is going
* to be considered on this CPU.)
*/
- if (!sync || cpu != this_cpu) {
- if (TASK_PREEMPTS_CURR(p, rq))
- resched_task(rq->curr);
- }
+ if (!sync || cpu != this_cpu)
+ check_preempt_curr(rq, p);
success = 1;
out_running:
@@ -1676,19 +1535,36 @@ int fastcall wake_up_state(struct task_struct *p, unsigned int state)
return try_to_wake_up(p, state, 0);
}
-static void task_running_tick(struct rq *rq, struct task_struct *p);
/*
* Perform scheduler related setup for a newly forked process p.
* p is forked by current.
- */
-void fastcall sched_fork(struct task_struct *p, int clone_flags)
-{
- int cpu = get_cpu();
+ *
+ * __sched_fork() is basic setup used by init_idle() too:
+ */
+static void __sched_fork(struct task_struct *p)
+{
+ p->se.wait_start_fair = 0;
+ p->se.wait_start = 0;
+ p->se.exec_start = 0;
+ p->se.sum_exec_runtime = 0;
+ p->se.delta_exec = 0;
+ p->se.delta_fair_run = 0;
+ p->se.delta_fair_sleep = 0;
+ p->se.wait_runtime = 0;
+ p->se.sum_wait_runtime = 0;
+ p->se.sum_sleep_runtime = 0;
+ p->se.sleep_start = 0;
+ p->se.sleep_start_fair = 0;
+ p->se.block_start = 0;
+ p->se.sleep_max = 0;
+ p->se.block_max = 0;
+ p->se.exec_max = 0;
+ p->se.wait_max = 0;
+ p->se.wait_runtime_overruns = 0;
+ p->se.wait_runtime_underruns = 0;
-#ifdef CONFIG_SMP
- cpu = sched_balance_self(cpu, SD_BALANCE_FORK);
-#endif
- set_task_cpu(p, cpu);
+ INIT_LIST_HEAD(&p->run_list);
+ p->se.on_rq = 0;
/*
* We mark the process as running here, but have not actually
@@ -1697,16 +1573,29 @@ void fastcall sched_fork(struct task_struct *p, int clone_flags)
* event cannot wake it up and insert it on the runqueue either.
*/
p->state = TASK_RUNNING;
+}
+
+/*
+ * fork()/clone()-time setup:
+ */
+void sched_fork(struct task_struct *p, int clone_flags)
+{
+ int cpu = get_cpu();
+
+ __sched_fork(p);
+
+#ifdef CONFIG_SMP
+ cpu = sched_balance_self(cpu, SD_BALANCE_FORK);
+#endif
+ __set_task_cpu(p, cpu);
/*
* Make sure we do not leak PI boosting priority to the child:
*/
p->prio = current->normal_prio;
- INIT_LIST_HEAD(&p->run_list);
- p->array = NULL;
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
- if (unlikely(sched_info_on()))
+ if (likely(sched_info_on()))
memset(&p->sched_info, 0, sizeof(p->sched_info));
#endif
#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
@@ -1716,34 +1605,16 @@ void fastcall sched_fork(struct task_struct *p, int clone_flags)
/* Want to start with kernel preemption disabled. */
task_thread_info(p)->preempt_count = 1;
#endif
- /*
- * Share the timeslice between parent and child, thus the
- * total amount of pending timeslices in the system doesn't change,
- * resulting in more scheduling fairness.
- */
- local_irq_disable();
- p->time_slice = (current->time_slice + 1) >> 1;
- /*
- * The remainder of the first timeslice might be recovered by
- * the parent if the child exits early enough.
- */
- p->first_time_slice = 1;
- current->time_slice >>= 1;
- p->timestamp = sched_clock();
- if (unlikely(!current->time_slice)) {
- /*
- * This case is rare, it happens when the parent has only
- * a single jiffy left from its timeslice. Taking the
- * runqueue lock is not a problem.
- */
- current->time_slice = 1;
- task_running_tick(cpu_rq(cpu), current);
- }
- local_irq_enable();
put_cpu();
}
/*
+ * After fork, child runs first. (default) If set to 0 then
+ * parent will (try to) run first.
+ */
+unsigned int __read_mostly sysctl_sched_child_runs_first = 1;
+
+/*
* wake_up_new_task - wake up a newly created task for the first time.
*
* This function will do some initial scheduler statistics housekeeping
@@ -1752,107 +1623,27 @@ void fastcall sched_fork(struct task_struct *p, int clone_flags)
*/
void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
{
- struct rq *rq, *this_rq;
unsigned long flags;
- int this_cpu, cpu;
+ struct rq *rq;
+ int this_cpu;
rq = task_rq_lock(p, &flags);
BUG_ON(p->state != TASK_RUNNING);
- this_cpu = smp_processor_id();
- cpu = task_cpu(p);
-
- /*
- * We decrease the sleep average of forking parents
- * and children as well, to keep max-interactive tasks
- * from forking tasks that are max-interactive. The parent
- * (current) is done further down, under its lock.
- */
- p->sleep_avg = JIFFIES_TO_NS(CURRENT_BONUS(p) *
- CHILD_PENALTY / 100 * MAX_SLEEP_AVG / MAX_BONUS);
+ this_cpu = smp_processor_id(); /* parent's CPU */
p->prio = effective_prio(p);
- if (likely(cpu == this_cpu)) {
- if (!(clone_flags & CLONE_VM)) {
- /*
- * The VM isn't cloned, so we're in a good position to
- * do child-runs-first in anticipation of an exec. This
- * usually avoids a lot of COW overhead.
- */
- if (unlikely(!current->array))
- __activate_task(p, rq);
- else {
- p->prio = current->prio;
- p->normal_prio = current->normal_prio;
- list_add_tail(&p->run_list, &current->run_list);
- p->array = current->array;
- p->array->nr_active++;
- inc_nr_running(p, rq);
- }
- set_need_resched();
- } else
- /* Run child last */
- __activate_task(p, rq);
- /*
- * We skip the following code due to cpu == this_cpu
- *
- * task_rq_unlock(rq, &flags);
- * this_rq = task_rq_lock(current, &flags);
- */
- this_rq = rq;
+ if (!sysctl_sched_child_runs_first || (clone_flags & CLONE_VM) ||
+ task_cpu(p) != this_cpu || !current->se.on_rq) {
+ activate_task(rq, p, 0);
} else {
- this_rq = cpu_rq(this_cpu);
-
/*
- * Not the local CPU - must adjust timestamp. This should
- * get optimised away in the !CONFIG_SMP case.
+ * Let the scheduling class do new task startup
+ * management (if any):
*/
- p->timestamp = (p->timestamp - this_rq->most_recent_timestamp)
- + rq->most_recent_timestamp;
- __activate_task(p, rq);
- if (TASK_PREEMPTS_CURR(p, rq))
- resched_task(rq->curr);
-
- /*
- * Parent and child are on different CPUs, now get the
- * parent runqueue to update the parent's ->sleep_avg:
- */
- task_rq_unlock(rq, &flags);
- this_rq = task_rq_lock(current, &flags);
- }
- current->sleep_avg = JIFFIES_TO_NS(CURRENT_BONUS(current) *
- PARENT_PENALTY / 100 * MAX_SLEEP_AVG / MAX_BONUS);
- task_rq_unlock(this_rq, &flags);
-}
-
-/*
- * Potentially available exiting-child timeslices are
- * retrieved here - this way the parent does not get
- * penalized for creating too many threads.
- *
- * (this cannot be used to 'generate' timeslices
- * artificially, because any timeslice recovered here
- * was given away by the parent in the first place.)
- */
-void fastcall sched_exit(struct task_struct *p)
-{
- unsigned long flags;
- struct rq *rq;
-
- /*
- * If the child was a (relative-) CPU hog then decrease
- * the sleep_avg of the parent as well.
- */
- rq = task_rq_lock(p->parent, &flags);
- if (p->first_time_slice && task_cpu(p) == task_cpu(p->parent)) {
- p->parent->time_slice += p->time_slice;
- if (unlikely(p->parent->time_slice > task_timeslice(p)))
- p->parent->time_slice = task_timeslice(p);
+ p->sched_class->task_new(rq, p);
}
- if (p->sleep_avg < p->parent->sleep_avg)
- p->parent->sleep_avg = p->parent->sleep_avg /
- (EXIT_WEIGHT + 1) * EXIT_WEIGHT + p->sleep_avg /
- (EXIT_WEIGHT + 1);
+ check_preempt_curr(rq, p);
task_rq_unlock(rq, &flags);
}
@@ -1917,7 +1708,7 @@ static inline void finish_task_switch(struct rq *rq, struct task_struct *prev)
/*
* Remove function-return probe instances associated with this
* task and put them back on the free list.
- */
+ */
kprobe_flush_task(prev);
put_task_struct(prev);
}
@@ -1945,13 +1736,15 @@ asmlinkage void schedule_tail(struct task_struct *prev)
* context_switch - switch to the new MM and the new
* thread's register state.
*/
-static inline struct task_struct *
+static inline void
context_switch(struct rq *rq, struct task_struct *prev,
struct task_struct *next)
{
- struct mm_struct *mm = next->mm;
- struct mm_struct *oldmm = prev->active_mm;
+ struct mm_struct *mm, *oldmm;
+ prepare_task_switch(rq, next);
+ mm = next->mm;
+ oldmm = prev->active_mm;
/*
* For paravirt, this is coupled with an exit in switch_to to
* combine the page table reload and the switch backend into
@@ -1959,16 +1752,15 @@ context_switch(struct rq *rq, struct task_struct *prev,
*/
arch_enter_lazy_cpu_mode();
- if (!mm) {
+ if (unlikely(!mm)) {
next->active_mm = oldmm;
atomic_inc(&oldmm->mm_count);
enter_lazy_tlb(oldmm, next);
} else
switch_mm(oldmm, mm, next);
- if (!prev->mm) {
+ if (unlikely(!prev->mm)) {
prev->active_mm = NULL;
- WARN_ON(rq->prev_mm);
rq->prev_mm = oldmm;
}
/*
@@ -1984,7 +1776,13 @@ context_switch(struct rq *rq, struct task_struct *prev,
/* Here we just switch the register state and the stack. */
switch_to(prev, next, prev);
- return prev;
+ barrier();
+ /*
+ * this_rq must be evaluated again because prev may have moved
+ * CPUs since it called schedule(), thus the 'rq' on its stack
+ * frame will be invalid.
+ */
+ finish_task_switch(this_rq(), prev);
}
/*
@@ -2057,17 +1855,65 @@ unsigned long nr_active(void)
return running + uninterruptible;
}
-#ifdef CONFIG_SMP
-
/*
- * Is this task likely cache-hot:
+ * Update rq->cpu_load[] statistics. This function is usually called every
+ * scheduler tick (TICK_NSEC).
*/
-static inline int
-task_hot(struct task_struct *p, unsigned long long now, struct sched_domain *sd)
+static void update_cpu_load(struct rq *this_rq)
{
- return (long long)(now - p->last_ran) < (long long)sd->cache_hot_time;
+ u64 fair_delta64, exec_delta64, idle_delta64, sample_interval64, tmp64;
+ unsigned long total_load = this_rq->ls.load.weight;
+ unsigned long this_load = total_load;
+ struct load_stat *ls = &this_rq->ls;
+ u64 now = __rq_clock(this_rq);
+ int i, scale;
+
+ this_rq->nr_load_updates++;
+ if (unlikely(!(sysctl_sched_features & SCHED_FEAT_PRECISE_CPU_LOAD)))
+ goto do_avg;
+
+ /* Update delta_fair/delta_exec fields first */
+ update_curr_load(this_rq, now);
+
+ fair_delta64 = ls->delta_fair + 1;
+ ls->delta_fair = 0;
+
+ exec_delta64 = ls->delta_exec + 1;
+ ls->delta_exec = 0;
+
+ sample_interval64 = now - ls->load_update_last;
+ ls->load_update_last = now;
+
+ if ((s64)sample_interval64 < (s64)TICK_NSEC)
+ sample_interval64 = TICK_NSEC;
+
+ if (exec_delta64 > sample_interval64)
+ exec_delta64 = sample_interval64;
+
+ idle_delta64 = sample_interval64 - exec_delta64;
+
+ tmp64 = div64_64(SCHED_LOAD_SCALE * exec_delta64, fair_delta64);
+ tmp64 = div64_64(tmp64 * exec_delta64, sample_interval64);
+
+ this_load = (unsigned long)tmp64;
+
+do_avg:
+
+ /* Update our load: */
+ for (i = 0, scale = 1; i < CPU_LOAD_IDX_MAX; i++, scale += scale) {
+ unsigned long old_load, new_load;
+
+ /* scale is effectively 1 << i now, and >> i divides by scale */
+
+ old_load = this_rq->cpu_load[i];
+ new_load = this_load;
+
+ this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i;
+ }
}
+#ifdef CONFIG_SMP
+
/*
* double_rq_lock - safely lock two runqueues
*
@@ -2184,23 +2030,17 @@ void sched_exec(void)
* pull_task - move a task from a remote runqueue to the local runqueue.
* Both runqueues must be locked.
*/
-static void pull_task(struct rq *src_rq, struct prio_array *src_array,
- struct task_struct *p, struct rq *this_rq,
- struct prio_array *this_array, int this_cpu)
+static void pull_task(struct rq *src_rq, struct task_struct *p,
+ struct rq *this_rq, int this_cpu)
{
- dequeue_task(p, src_array);
- dec_nr_running(p, src_rq);
+ deactivate_task(src_rq, p, 0);
set_task_cpu(p, this_cpu);
- inc_nr_running(p, this_rq);
- enqueue_task(p, this_array);
- p->timestamp = (p->timestamp - src_rq->most_recent_timestamp)
- + this_rq->most_recent_timestamp;
+ activate_task(this_rq, p, 0);
/*
* Note that idle threads have a prio of MAX_PRIO, for this test
* to be always true for them.
*/
- if (TASK_PREEMPTS_CURR(p, this_rq))
- resched_task(this_rq->curr);
+ check_preempt_curr(this_rq, p);
}
/*
@@ -2208,7 +2048,7 @@ static void pull_task(struct rq *src_rq, struct prio_array *src_array,
*/
static
int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
- struct sched_domain *sd, enum idle_type idle,
+ struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned)
{
/*
@@ -2225,132 +2065,67 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
return 0;
/*
- * Aggressive migration if:
- * 1) task is cache cold, or
- * 2) too many balance attempts have failed.
+ * Aggressive migration if too many balance attempts have failed:
*/
-
- if (sd->nr_balance_failed > sd->cache_nice_tries) {
-#ifdef CONFIG_SCHEDSTATS
- if (task_hot(p, rq->most_recent_timestamp, sd))
- schedstat_inc(sd, lb_hot_gained[idle]);
-#endif
+ if (sd->nr_balance_failed > sd->cache_nice_tries)
return 1;
- }
- if (task_hot(p, rq->most_recent_timestamp, sd))
- return 0;
return 1;
}
-#define rq_best_prio(rq) min((rq)->curr->prio, (rq)->best_expired_prio)
-
-/*
- * move_tasks tries to move up to max_nr_move tasks and max_load_move weighted
- * load from busiest to this_rq, as part of a balancing operation within
- * "domain". Returns the number of tasks moved.
- *
- * Called with both runqueues locked.
- */
-static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
+static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_nr_move, unsigned long max_load_move,
- struct sched_domain *sd, enum idle_type idle,
- int *all_pinned)
+ struct sched_domain *sd, enum cpu_idle_type idle,
+ int *all_pinned, unsigned long *load_moved,
+ int this_best_prio, int best_prio, int best_prio_seen,
+ struct rq_iterator *iterator)
{
- int idx, pulled = 0, pinned = 0, this_best_prio, best_prio,
- best_prio_seen, skip_for_load;
- struct prio_array *array, *dst_array;
- struct list_head *head, *curr;
- struct task_struct *tmp;
- long rem_load_move;
+ int pulled = 0, pinned = 0, skip_for_load;
+ struct task_struct *p;
+ long rem_load_move = max_load_move;
if (max_nr_move == 0 || max_load_move == 0)
goto out;
- rem_load_move = max_load_move;
pinned = 1;
- this_best_prio = rq_best_prio(this_rq);
- best_prio = rq_best_prio(busiest);
- /*
- * Enable handling of the case where there is more than one task
- * with the best priority. If the current running task is one
- * of those with prio==best_prio we know it won't be moved
- * and therefore it's safe to override the skip (based on load) of
- * any task we find with that prio.
- */
- best_prio_seen = best_prio == busiest->curr->prio;
/*
- * We first consider expired tasks. Those will likely not be
- * executed in the near future, and they are most likely to
- * be cache-cold, thus switching CPUs has the least effect
- * on them.
+ * Start the load-balancing iterator:
*/
- if (busiest->expired->nr_active) {
- array = busiest->expired;
- dst_array = this_rq->expired;
- } else {
- array = busiest->active;
- dst_array = this_rq->active;
- }
-
-new_array:
- /* Start searching at priority 0: */
- idx = 0;
-skip_bitmap:
- if (!idx)
- idx = sched_find_first_bit(array->bitmap);
- else
- idx = find_next_bit(array->bitmap, MAX_PRIO, idx);
- if (idx >= MAX_PRIO) {
- if (array == busiest->expired && busiest->active->nr_active) {
- array = busiest->active;
- dst_array = this_rq->active;
- goto new_array;
- }
+ p = iterator->start(iterator->arg);
+next:
+ if (!p)
goto out;
- }
-
- head = array->queue + idx;
- curr = head->prev;
-skip_queue:
- tmp = list_entry(curr, struct task_struct, run_list);
-
- curr = curr->prev;
-
/*
* To help distribute high priority tasks accross CPUs we don't
* skip a task if it will be the highest priority task (i.e. smallest
* prio value) on its new queue regardless of its load weight
*/
- skip_for_load = tmp->load_weight > rem_load_move;
- if (skip_for_load && idx < this_best_prio)
- skip_for_load = !best_prio_seen && idx == best_prio;
+ skip_for_load = (p->se.load.weight >> 1) > rem_load_move +
+ SCHED_LOAD_SCALE_FUZZ;
+ if (skip_for_load && p->prio < this_best_prio)
+ skip_for_load = !best_prio_seen && p->prio == best_prio;
if (skip_for_load ||
- !can_migrate_task(tmp, busiest, this_cpu, sd, idle, &pinned)) {
+ !can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) {
- best_prio_seen |= idx == best_prio;
- if (curr != head)
- goto skip_queue;
- idx++;
- goto skip_bitmap;
+ best_prio_seen |= p->prio == best_prio;
+ p = iterator->next(iterator->arg);
+ goto next;
}
- pull_task(busiest, array, tmp, this_rq, dst_array, this_cpu);
+ pull_task(busiest, p, this_rq, this_cpu);
pulled++;
- rem_load_move -= tmp->load_weight;
+ rem_load_move -= p->se.load.weight;
/*
* We only want to steal up to the prescribed number of tasks
* and the prescribed amount of weighted load.
*/
if (pulled < max_nr_move && rem_load_move > 0) {
- if (idx < this_best_prio)
- this_best_prio = idx;
- if (curr != head)
- goto skip_queue;
- idx++;
- goto skip_bitmap;
+ if (p->prio < this_best_prio)
+ this_best_prio = p->prio;
+ p = iterator->next(iterator->arg);
+ goto next;
}
out:
/*
@@ -2362,18 +2137,48 @@ out:
if (all_pinned)
*all_pinned = pinned;
+ *load_moved = max_load_move - rem_load_move;
return pulled;
}
/*
+ * move_tasks tries to move up to max_nr_move tasks and max_load_move weighted
+ * load from busiest to this_rq, as part of a balancing operation within
+ * "domain". Returns the number of tasks moved.
+ *
+ * Called with both runqueues locked.
+ */
+static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
+ unsigned long max_nr_move, unsigned long max_load_move,
+ struct sched_domain *sd, enum cpu_idle_type idle,
+ int *all_pinned)
+{
+ struct sched_class *class = sched_class_highest;
+ unsigned long load_moved, total_nr_moved = 0, nr_moved;
+ long rem_load_move = max_load_move;
+
+ do {
+ nr_moved = class->load_balance(this_rq, this_cpu, busiest,
+ max_nr_move, (unsigned long)rem_load_move,
+ sd, idle, all_pinned, &load_moved);
+ total_nr_moved += nr_moved;
+ max_nr_move -= nr_moved;
+ rem_load_move -= load_moved;
+ class = class->next;
+ } while (class && max_nr_move && rem_load_move > 0);
+
+ return total_nr_moved;
+}
+
+/*
* find_busiest_group finds and returns the busiest CPU group within the
* domain. It calculates and returns the amount of weighted load which
* should be moved to restore balance via the imbalance parameter.
*/
static struct sched_group *
find_busiest_group(struct sched_domain *sd, int this_cpu,
- unsigned long *imbalance, enum idle_type idle, int *sd_idle,
- cpumask_t *cpus, int *balance)
+ unsigned long *imbalance, enum cpu_idle_type idle,
+ int *sd_idle, cpumask_t *cpus, int *balance)
{
struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups;
unsigned long max_load, avg_load, total_load, this_load, total_pwr;
@@ -2391,9 +2196,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
max_load = this_load = total_load = total_pwr = 0;
busiest_load_per_task = busiest_nr_running = 0;
this_load_per_task = this_nr_running = 0;
- if (idle == NOT_IDLE)
+ if (idle == CPU_NOT_IDLE)
load_idx = sd->busy_idx;
- else if (idle == NEWLY_IDLE)
+ else if (idle == CPU_NEWLY_IDLE)
load_idx = sd->newidle_idx;
else
load_idx = sd->idle_idx;
@@ -2437,7 +2242,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
avg_load += load;
sum_nr_running += rq->nr_running;
- sum_weighted_load += rq->raw_weighted_load;
+ sum_weighted_load += weighted_cpuload(i);
}
/*
@@ -2477,8 +2282,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
* Busy processors will not participate in power savings
* balance.
*/
- if (idle == NOT_IDLE || !(sd->flags & SD_POWERSAVINGS_BALANCE))
- goto group_next;
+ if (idle == CPU_NOT_IDLE ||
+ !(sd->flags & SD_POWERSAVINGS_BALANCE))
+ goto group_next;
/*
* If the local group is idle or completely loaded
@@ -2488,42 +2294,42 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
!this_nr_running))
power_savings_balance = 0;
- /*
+ /*
* If a group is already running at full capacity or idle,
* don't include that group in power savings calculations
- */
- if (!power_savings_balance || sum_nr_running >= group_capacity
+ */
+ if (!power_savings_balance || sum_nr_running >= group_capacity
|| !sum_nr_running)
- goto group_next;
+ goto group_next;
- /*
+ /*
* Calculate the group which has the least non-idle load.
- * This is the group from where we need to pick up the load
- * for saving power
- */
- if ((sum_nr_running < min_nr_running) ||
- (sum_nr_running == min_nr_running &&
+ * This is the group from where we need to pick up the load
+ * for saving power
+ */
+ if ((sum_nr_running < min_nr_running) ||
+ (sum_nr_running == min_nr_running &&
first_cpu(group->cpumask) <
first_cpu(group_min->cpumask))) {
- group_min = group;
- min_nr_running = sum_nr_running;
+ group_min = group;
+ min_nr_running = sum_nr_running;
min_load_per_task = sum_weighted_load /
sum_nr_running;
- }
+ }
- /*
+ /*
* Calculate the group which is almost near its
- * capacity but still has some space to pick up some load
- * from other group and save more power
- */
- if (sum_nr_running <= group_capacity - 1) {
- if (sum_nr_running > leader_nr_running ||
- (sum_nr_running == leader_nr_running &&
- first_cpu(group->cpumask) >
- first_cpu(group_leader->cpumask))) {
- group_leader = group;
- leader_nr_running = sum_nr_running;
- }
+ * capacity but still has some space to pick up some load
+ * from other group and save more power
+ */
+ if (sum_nr_running <= group_capacity - 1) {
+ if (sum_nr_running > leader_nr_running ||
+ (sum_nr_running == leader_nr_running &&
+ first_cpu(group->cpumask) >
+ first_cpu(group_leader->cpumask))) {
+ group_leader = group;
+ leader_nr_running = sum_nr_running;
+ }
}
group_next:
#endif
@@ -2578,7 +2384,7 @@ group_next:
* a think about bumping its value to force at least one task to be
* moved
*/
- if (*imbalance < busiest_load_per_task) {
+ if (*imbalance + SCHED_LOAD_SCALE_FUZZ < busiest_load_per_task/2) {
unsigned long tmp, pwr_now, pwr_move;
unsigned int imbn;
@@ -2592,7 +2398,8 @@ small_imbalance:
} else
this_load_per_task = SCHED_LOAD_SCALE;
- if (max_load - this_load >= busiest_load_per_task * imbn) {
+ if (max_load - this_load + SCHED_LOAD_SCALE_FUZZ >=
+ busiest_load_per_task * imbn) {
*imbalance = busiest_load_per_task;
return busiest;
}
@@ -2639,7 +2446,7 @@ small_imbalance:
out_balanced:
#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
- if (idle == NOT_IDLE || !(sd->flags & SD_POWERSAVINGS_BALANCE))
+ if (idle == CPU_NOT_IDLE || !(sd->flags & SD_POWERSAVINGS_BALANCE))
goto ret;
if (this == group_leader && group_leader != group_min) {
@@ -2656,7 +2463,7 @@ ret:
* find_busiest_queue - find the busiest runqueue among the cpus in group.
*/
static struct rq *
-find_busiest_queue(struct sched_group *group, enum idle_type idle,
+find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
unsigned long imbalance, cpumask_t *cpus)
{
struct rq *busiest = NULL, *rq;
@@ -2664,17 +2471,19 @@ find_busiest_queue(struct sched_group *group, enum idle_type idle,
int i;
for_each_cpu_mask(i, group->cpumask) {
+ unsigned long wl;
if (!cpu_isset(i, *cpus))
continue;
rq = cpu_rq(i);
+ wl = weighted_cpuload(i);
- if (rq->nr_running == 1 && rq->raw_weighted_load > imbalance)
+ if (rq->nr_running == 1 && wl > imbalance)
continue;
- if (rq->raw_weighted_load > max_load) {
- max_load = rq->raw_weighted_load;
+ if (wl > max_load) {
+ max_load = wl;
busiest = rq;
}
}
@@ -2698,7 +2507,7 @@ static inline unsigned long minus_1_or_zero(unsigned long n)
* tasks if there is an imbalance.
*/
static int load_balance(int this_cpu, struct rq *this_rq,
- struct sched_domain *sd, enum idle_type idle,
+ struct sched_domain *sd, enum cpu_idle_type idle,
int *balance)
{
int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
@@ -2711,10 +2520,10 @@ static int load_balance(int this_cpu, struct rq *this_rq,
/*
* When power savings policy is enabled for the parent domain, idle
* sibling can pick up load irrespective of busy siblings. In this case,
- * let the state of idle sibling percolate up as IDLE, instead of
- * portraying it as NOT_IDLE.
+ * let the state of idle sibling percolate up as CPU_IDLE, instead of
+ * portraying it as CPU_NOT_IDLE.
*/
- if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER &&
+ if (idle != CPU_NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER &&
!test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
sd_idle = 1;
@@ -2848,7 +2657,7 @@ out_one_pinned:
* Check this_cpu to ensure it is balanced within domain. Attempt to move
* tasks if there is an imbalance.
*
- * Called from schedule when this_rq is about to become idle (NEWLY_IDLE).
+ * Called from schedule when this_rq is about to become idle (CPU_NEWLY_IDLE).
* this_rq is locked.
*/
static int
@@ -2865,31 +2674,31 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
* When power savings policy is enabled for the parent domain, idle
* sibling can pick up load irrespective of busy siblings. In this case,
* let the state of idle sibling percolate up as IDLE, instead of
- * portraying it as NOT_IDLE.
+ * portraying it as CPU_NOT_IDLE.
*/
if (sd->flags & SD_SHARE_CPUPOWER &&
!test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
sd_idle = 1;
- schedstat_inc(sd, lb_cnt[NEWLY_IDLE]);
+ schedstat_inc(sd, lb_cnt[CPU_NEWLY_IDLE]);
redo:
- group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE,
+ group = find_busiest_group(sd, this_cpu, &imbalance, CPU_NEWLY_IDLE,
&sd_idle, &cpus, NULL);
if (!group) {
- schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]);
+ schedstat_inc(sd, lb_nobusyg[CPU_NEWLY_IDLE]);
goto out_balanced;
}
- busiest = find_busiest_queue(group, NEWLY_IDLE, imbalance,
+ busiest = find_busiest_queue(group, CPU_NEWLY_IDLE, imbalance,
&cpus);
if (!busiest) {
- schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]);
+ schedstat_inc(sd, lb_nobusyq[CPU_NEWLY_IDLE]);
goto out_balanced;
}
BUG_ON(busiest == this_rq);
- schedstat_add(sd, lb_imbalance[NEWLY_IDLE], imbalance);
+ schedstat_add(sd, lb_imbalance[CPU_NEWLY_IDLE], imbalance);
nr_moved = 0;
if (busiest->nr_running > 1) {
@@ -2897,7 +2706,7 @@ redo:
double_lock_balance(this_rq, busiest);
nr_moved = move_tasks(this_rq, this_cpu, busiest,
minus_1_or_zero(busiest->nr_running),
- imbalance, sd, NEWLY_IDLE, NULL);
+ imbalance, sd, CPU_NEWLY_IDLE, NULL);
spin_unlock(&busiest->lock);
if (!nr_moved) {
@@ -2908,7 +2717,7 @@ redo:
}
if (!nr_moved) {
- schedstat_inc(sd, lb_failed[NEWLY_IDLE]);
+ schedstat_inc(sd, lb_failed[CPU_NEWLY_IDLE]);
if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
!test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
return -1;
@@ -2918,7 +2727,7 @@ redo:
return nr_moved;
out_balanced:
- schedstat_inc(sd, lb_balanced[NEWLY_IDLE]);
+ schedstat_inc(sd, lb_balanced[CPU_NEWLY_IDLE]);
if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
!test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
return -1;
@@ -2934,8 +2743,8 @@ out_balanced:
static void idle_balance(int this_cpu, struct rq *this_rq)
{
struct sched_domain *sd;
- int pulled_task = 0;
- unsigned long next_balance = jiffies + 60 * HZ;
+ int pulled_task = -1;
+ unsigned long next_balance = jiffies + HZ;
for_each_domain(this_cpu, sd) {
unsigned long interval;
@@ -2954,12 +2763,13 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
if (pulled_task)
break;
}
- if (!pulled_task)
+ if (pulled_task || time_after(jiffies, this_rq->next_balance)) {
/*
* We are going idle. next_balance may be set based on
* a busy processor. So reset next_balance.
*/
this_rq->next_balance = next_balance;
+ }
}
/*
@@ -3003,7 +2813,7 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
schedstat_inc(sd, alb_cnt);
if (move_tasks(target_rq, target_cpu, busiest_rq, 1,
- RTPRIO_TO_LOAD_WEIGHT(100), sd, SCHED_IDLE,
+ RTPRIO_TO_LOAD_WEIGHT(100), sd, CPU_IDLE,
NULL))
schedstat_inc(sd, alb_pushed);
else
@@ -3012,32 +2822,6 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
spin_unlock(&target_rq->lock);
}
-static void update_load(struct rq *this_rq)
-{
- unsigned long this_load;
- unsigned int i, scale;
-
- this_load = this_rq->raw_weighted_load;
-
- /* Update our load: */
- for (i = 0, scale = 1; i < 3; i++, scale += scale) {
- unsigned long old_load, new_load;
-
- /* scale is effectively 1 << i now, and >> i divides by scale */
-
- old_load = this_rq->cpu_load[i];
- new_load = this_load;
- /*
- * Round up the averaging division if load is increasing. This
- * prevents us from getting stuck on 9 if the load is 10, for
- * example.
- */
- if (new_load > old_load)
- new_load += scale-1;
- this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i;
- }
-}
-
#ifdef CONFIG_NO_HZ
static struct {
atomic_t load_balancer;
@@ -3120,7 +2904,7 @@ static DEFINE_SPINLOCK(balancing);
*
* Balancing parameters are set up in arch_init_sched_domains.
*/
-static inline void rebalance_domains(int cpu, enum idle_type idle)
+static inline void rebalance_domains(int cpu, enum cpu_idle_type idle)
{
int balance = 1;
struct rq *rq = cpu_rq(cpu);
@@ -3134,13 +2918,16 @@ static inline void rebalance_domains(int cpu, enum idle_type idle)
continue;
interval = sd->balance_interval;
- if (idle != SCHED_IDLE)
+ if (idle != CPU_IDLE)
interval *= sd->busy_factor;
/* scale ms to jiffies */
interval = msecs_to_jiffies(interval);
if (unlikely(!interval))
interval = 1;
+ if (interval > HZ*NR_CPUS/10)
+ interval = HZ*NR_CPUS/10;
+
if (sd->flags & SD_SERIALIZE) {
if (!spin_trylock(&balancing))
@@ -3154,7 +2941,7 @@ static inline void rebalance_domains(int cpu, enum idle_type idle)
* longer idle, or one of our SMT siblings is
* not idle.
*/
- idle = NOT_IDLE;
+ idle = CPU_NOT_IDLE;
}
sd->last_balance = jiffies;
}
@@ -3182,11 +2969,12 @@ out:
*/
static void run_rebalance_domains(struct softirq_action *h)
{
- int local_cpu = smp_processor_id();
- struct rq *local_rq = cpu_rq(local_cpu);
- enum idle_type idle = local_rq->idle_at_tick ? SCHED_IDLE : NOT_IDLE;
+ int this_cpu = smp_processor_id();
+ struct rq *this_rq = cpu_rq(this_cpu);
+ enum cpu_idle_type idle = this_rq->idle_at_tick ?
+ CPU_IDLE : CPU_NOT_IDLE;
- rebalance_domains(local_cpu, idle);
+ rebalance_domains(this_cpu, idle);
#ifdef CONFIG_NO_HZ
/*
@@ -3194,13 +2982,13 @@ static void run_rebalance_domains(struct softirq_action *h)
* balancing on behalf of the other idle cpus whose ticks are
* stopped.
*/
- if (local_rq->idle_at_tick &&
- atomic_read(&nohz.load_balancer) == local_cpu) {
+ if (this_rq->idle_at_tick &&
+ atomic_read(&nohz.load_balancer) == this_cpu) {
cpumask_t cpus = nohz.cpu_mask;
struct rq *rq;
int balance_cpu;
- cpu_clear(local_cpu, cpus);
+ cpu_clear(this_cpu, cpus);
for_each_cpu_mask(balance_cpu, cpus) {
/*
* If this cpu gets work to do, stop the load balancing
@@ -3213,8 +3001,8 @@ static void run_rebalance_domains(struct softirq_action *h)
rebalance_domains(balance_cpu, SCHED_IDLE);
rq = cpu_rq(balance_cpu);
- if (time_after(local_rq->next_balance, rq->next_balance))
- local_rq->next_balance = rq->next_balance;
+ if (time_after(this_rq->next_balance, rq->next_balance))
+ this_rq->next_balance = rq->next_balance;
}
}
#endif
@@ -3227,9 +3015,8 @@ static void run_rebalance_domains(struct softirq_action *h)
* idle load balancing owner or decide to stop the periodic load balancing,
* if the whole system is idle.
*/
-static inline void trigger_load_balance(int cpu)
+static inline void trigger_load_balance(struct rq *rq, int cpu)
{
- struct rq *rq = cpu_rq(cpu);
#ifdef CONFIG_NO_HZ
/*
* If we were in the nohz mode recently and busy at the current
@@ -3281,13 +3068,29 @@ static inline void trigger_load_balance(int cpu)
if (time_after_eq(jiffies, rq->next_balance))
raise_softirq(SCHED_SOFTIRQ);
}
-#else
+
+#else /* CONFIG_SMP */
+
/*
* on UP we do not need to balance between CPUs:
*/
static inline void idle_balance(int cpu, struct rq *rq)
{
}
+
+/* Avoid "used but not defined" warning on UP */
+static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
+ unsigned long max_nr_move, unsigned long max_load_move,
+ struct sched_domain *sd, enum cpu_idle_type idle,
+ int *all_pinned, unsigned long *load_moved,
+ int this_best_prio, int best_prio, int best_prio_seen,
+ struct rq_iterator *iterator)
+{
+ *load_moved = 0;
+
+ return 0;
+}
+
#endif
DEFINE_PER_CPU(struct kernel_stat, kstat);
@@ -3295,54 +3098,28 @@ DEFINE_PER_CPU(struct kernel_stat, kstat);
EXPORT_PER_CPU_SYMBOL(kstat);
/*
- * This is called on clock ticks and on context switches.
- * Bank in p->sched_time the ns elapsed since the last tick or switch.
- */
-static inline void
-update_cpu_clock(struct task_struct *p, struct rq *rq, unsigned long long now)
-{
- p->sched_time += now - p->last_ran;
- p->last_ran = rq->most_recent_timestamp = now;
-}
-
-/*
- * Return current->sched_time plus any more ns on the sched_clock
- * that have not yet been banked.
+ * Return p->sum_exec_runtime plus any more ns on the sched_clock
+ * that have not yet been banked in case the task is currently running.
*/
-unsigned long long current_sched_time(const struct task_struct *p)
+unsigned long long task_sched_runtime(struct task_struct *p)
{
- unsigned long long ns;
unsigned long flags;
+ u64 ns, delta_exec;
+ struct rq *rq;
- local_irq_save(flags);
- ns = p->sched_time + sched_clock() - p->last_ran;
- local_irq_restore(flags);
+ rq = task_rq_lock(p, &flags);
+ ns = p->se.sum_exec_runtime;
+ if (rq->curr == p) {
+ delta_exec = rq_clock(rq) - p->se.exec_start;
+ if ((s64)delta_exec > 0)
+ ns += delta_exec;
+ }
+ task_rq_unlock(rq, &flags);
return ns;
}
/*
- * We place interactive tasks back into the active array, if possible.
- *
- * To guarantee that this does not starve expired tasks we ignore the
- * interactivity of a task if the first expired task had to wait more
- * than a 'reasonable' amount of time. This deadline timeout is
- * load-dependent, as the frequency of array switched decreases with
- * increasing number of running tasks. We also ignore the interactivity
- * if a better static_prio task has expired:
- */
-static inline int expired_starving(struct rq *rq)
-{
- if (rq->curr->static_prio > rq->best_expired_prio)
- return 1;
- if (!STARVATION_LIMIT || !rq->expired_timestamp)
- return 0;
- if (jiffies - rq->expired_timestamp > STARVATION_LIMIT * rq->nr_running)
- return 1;
- return 0;
-}
-
-/*
* Account user cpu time to a process.
* @p: the process that the cpu time gets accounted to
* @hardirq_offset: the offset to subtract from hardirq_count()
@@ -3415,81 +3192,6 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
cpustat->steal = cputime64_add(cpustat->steal, tmp);
}
-static void task_running_tick(struct rq *rq, struct task_struct *p)
-{
- if (p->array != rq->active) {
- /* Task has expired but was not scheduled yet */
- set_tsk_need_resched(p);
- return;
- }
- spin_lock(&rq->lock);
- /*
- * The task was running during this tick - update the
- * time slice counter. Note: we do not update a thread's
- * priority until it either goes to sleep or uses up its
- * timeslice. This makes it possible for interactive tasks
- * to use up their timeslices at their highest priority levels.
- */
- if (rt_task(p)) {
- /*
- * RR tasks need a special form of timeslice management.
- * FIFO tasks have no timeslices.
- */
- if ((p->policy == SCHED_RR) && !--p->time_slice) {
- p->time_slice = task_timeslice(p);
- p->first_time_slice = 0;
- set_tsk_need_resched(p);
-
- /* put it at the end of the queue: */
- requeue_task(p, rq->active);
- }
- goto out_unlock;
- }
- if (!--p->time_slice) {
- dequeue_task(p, rq->active);
- set_tsk_need_resched(p);
- p->prio = effective_prio(p);
- p->time_slice = task_timeslice(p);
- p->first_time_slice = 0;
-
- if (!rq->expired_timestamp)
- rq->expired_timestamp = jiffies;
- if (!TASK_INTERACTIVE(p) || expired_starving(rq)) {
- enqueue_task(p, rq->expired);
- if (p->static_prio < rq->best_expired_prio)
- rq->best_expired_prio = p->static_prio;
- } else
- enqueue_task(p, rq->active);
- } else {
- /*
- * Prevent a too long timeslice allowing a task to monopolize
- * the CPU. We do this by splitting up the timeslice into
- * smaller pieces.
- *
- * Note: this does not mean the task's timeslices expire or
- * get lost in any way, they just might be preempted by
- * another task of equal priority. (one with higher
- * priority would have preempted this task already.) We
- * requeue this task to the end of the list on this priority
- * level, which is in essence a round-robin of tasks with
- * equal priority.
- *
- * This only applies to tasks in the interactive
- * delta range with at least TIMESLICE_GRANULARITY to requeue.
- */
- if (TASK_INTERACTIVE(p) && !((task_timeslice(p) -
- p->time_slice) % TIMESLICE_GRANULARITY(p)) &&
- (p->time_slice >= TIMESLICE_GRANULARITY(p)) &&
- (p->array == rq->active)) {
-
- requeue_task(p, rq->active);
- set_tsk_need_resched(p);
- }
- }
-out_unlock:
- spin_unlock(&rq->lock);
-}
-
/*
* This function gets called by the timer code, with HZ frequency.
* We call it with interrupts disabled.
@@ -3499,20 +3201,19 @@ out_unlock:
*/
void scheduler_tick(void)
{
- unsigned long long now = sched_clock();
- struct task_struct *p = current;
int cpu = smp_processor_id();
- int idle_at_tick = idle_cpu(cpu);
struct rq *rq = cpu_rq(cpu);
+ struct task_struct *curr = rq->curr;
- update_cpu_clock(p, rq, now);
+ spin_lock(&rq->lock);
+ if (curr != rq->idle) /* FIXME: needed? */
+ curr->sched_class->task_tick(rq, curr);
+ update_cpu_load(rq);
+ spin_unlock(&rq->lock);
- if (!idle_at_tick)
- task_running_tick(rq, p);
#ifdef CONFIG_SMP
- update_load(rq);
- rq->idle_at_tick = idle_at_tick;
- trigger_load_balance(cpu);
+ rq->idle_at_tick = idle_cpu(cpu);
+ trigger_load_balance(rq, cpu);
#endif
}
@@ -3554,170 +3255,129 @@ EXPORT_SYMBOL(sub_preempt_count);
#endif
-static inline int interactive_sleep(enum sleep_type sleep_type)
+/*
+ * Print scheduling while atomic bug:
+ */
+static noinline void __schedule_bug(struct task_struct *prev)
{
- return (sleep_type == SLEEP_INTERACTIVE ||
- sleep_type == SLEEP_INTERRUPTED);
+ printk(KERN_ERR "BUG: scheduling while atomic: %s/0x%08x/%d\n",
+ prev->comm, preempt_count(), prev->pid);
+ debug_show_held_locks(prev);
+ if (irqs_disabled())
+ print_irqtrace_events(prev);
+ dump_stack();
}
/*
- * schedule() is the main scheduler function.
+ * Various schedule()-time debugging checks and statistics:
*/
-asmlinkage void __sched schedule(void)
+static inline void schedule_debug(struct task_struct *prev)
{
- struct task_struct *prev, *next;
- struct prio_array *array;
- struct list_head *queue;
- unsigned long long now;
- unsigned long run_time;
- int cpu, idx, new_prio;
- long *switch_count;
- struct rq *rq;
-
/*
* Test if we are atomic. Since do_exit() needs to call into
* schedule() atomically, we ignore that path for now.
* Otherwise, whine if we are scheduling when we should not be.
*/
- if (unlikely(in_atomic() && !current->exit_state)) {
- printk(KERN_ERR "BUG: scheduling while atomic: "
- "%s/0x%08x/%d\n",
- current->comm, preempt_count(), current->pid);
- debug_show_held_locks(current);
- if (irqs_disabled())
- print_irqtrace_events(current);
- dump_stack();
- }
- profile_hit(SCHED_PROFILING, __builtin_return_address(0));
+ if (unlikely(in_atomic_preempt_off()) && unlikely(!prev->exit_state))
+ __schedule_bug(prev);
-need_resched:
- preempt_disable();
- prev = current;
- release_kernel_lock(prev);
-need_resched_nonpreemptible:
- rq = this_rq();
+ profile_hit(SCHED_PROFILING, __builtin_return_address(0));
- /*
- * The idle thread is not allowed to schedule!
- * Remove this check after it has been exercised a bit.
- */
- if (unlikely(prev == rq->idle) && prev->state != TASK_RUNNING) {
- printk(KERN_ERR "bad: scheduling from the idle thread!\n");
- dump_stack();
- }
+ schedstat_inc(this_rq(), sched_cnt);
+}
- schedstat_inc(rq, sched_cnt);
- now = sched_clock();
- if (likely((long long)(now - prev->timestamp) < NS_MAX_SLEEP_AVG)) {
- run_time = now - prev->timestamp;
- if (unlikely((long long)(now - prev->timestamp) < 0))
- run_time = 0;
- } else
- run_time = NS_MAX_SLEEP_AVG;
+/*
+ * Pick up the highest-prio task:
+ */
+static inline struct task_struct *
+pick_next_task(struct rq *rq, struct task_struct *prev, u64 now)
+{
+ struct sched_class *class;
+ struct task_struct *p;
/*
- * Tasks charged proportionately less run_time at high sleep_avg to
- * delay them losing their interactive status
+ * Optimization: we know that if all tasks are in
+ * the fair class we can call that function directly:
*/
- run_time /= (CURRENT_BONUS(prev) ? : 1);
-
- spin_lock_irq(&rq->lock);
-
- switch_count = &prev->nivcsw;
- if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
- switch_count = &prev->nvcsw;
- if (unlikely((prev->state & TASK_INTERRUPTIBLE) &&
- unlikely(signal_pending(prev))))
- prev->state = TASK_RUNNING;
- else {
- if (prev->state == TASK_UNINTERRUPTIBLE)
- rq->nr_uninterruptible++;
- deactivate_task(prev, rq);
- }
- }
-
- cpu = smp_processor_id();
- if (unlikely(!rq->nr_running)) {
- idle_balance(cpu, rq);
- if (!rq->nr_running) {
- next = rq->idle;
- rq->expired_timestamp = 0;
- goto switch_tasks;
- }
+ if (likely(rq->nr_running == rq->cfs.nr_running)) {
+ p = fair_sched_class.pick_next_task(rq, now);
+ if (likely(p))
+ return p;
}
- array = rq->active;
- if (unlikely(!array->nr_active)) {
+ class = sched_class_highest;
+ for ( ; ; ) {
+ p = class->pick_next_task(rq, now);
+ if (p)
+ return p;
/*
- * Switch the active and expired arrays.
+ * Will never be NULL as the idle class always
+ * returns a non-NULL p:
*/
- schedstat_inc(rq, sched_switch);
- rq->active = rq->expired;
- rq->expired = array;
- array = rq->active;
- rq->expired_timestamp = 0;
- rq->best_expired_prio = MAX_PRIO;
+ class = class->next;
}
+}
+
+/*
+ * schedule() is the main scheduler function.
+ */
+asmlinkage void __sched schedule(void)
+{
+ struct task_struct *prev, *next;
+ long *switch_count;
+ struct rq *rq;
+ u64 now;
+ int cpu;
- idx = sched_find_first_bit(array->bitmap);
- queue = array->queue + idx;
- next = list_entry(queue->next, struct task_struct, run_list);
+need_resched:
+ preempt_disable();
+ cpu = smp_processor_id();
+ rq = cpu_rq(cpu);
+ rcu_qsctr_inc(cpu);
+ prev = rq->curr;
+ switch_count = &prev->nivcsw;
- if (!rt_task(next) && interactive_sleep(next->sleep_type)) {
- unsigned long long delta = now - next->timestamp;
- if (unlikely((long long)(now - next->timestamp) < 0))
- delta = 0;
+ release_kernel_lock(prev);
+need_resched_nonpreemptible:
- if (next->sleep_type == SLEEP_INTERACTIVE)
- delta = delta * (ON_RUNQUEUE_WEIGHT * 128 / 100) / 128;
+ schedule_debug(prev);
- array = next->array;
- new_prio = recalc_task_prio(next, next->timestamp + delta);
+ spin_lock_irq(&rq->lock);
+ clear_tsk_need_resched(prev);
- if (unlikely(next->prio != new_prio)) {
- dequeue_task(next, array);
- next->prio = new_prio;
- enqueue_task(next, array);
+ if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
+ if (unlikely((prev->state & TASK_INTERRUPTIBLE) &&
+ unlikely(signal_pending(prev)))) {
+ prev->state = TASK_RUNNING;
+ } else {
+ deactivate_task(rq, prev, 1);
}
+ switch_count = &prev->nvcsw;
}
- next->sleep_type = SLEEP_NORMAL;
-switch_tasks:
- if (next == rq->idle)
- schedstat_inc(rq, sched_goidle);
- prefetch(next);
- prefetch_stack(next);
- clear_tsk_need_resched(prev);
- rcu_qsctr_inc(task_cpu(prev));
- update_cpu_clock(prev, rq, now);
+ if (unlikely(!rq->nr_running))
+ idle_balance(cpu, rq);
- prev->sleep_avg -= run_time;
- if ((long)prev->sleep_avg <= 0)
- prev->sleep_avg = 0;
- prev->timestamp = prev->last_ran = now;
+ now = __rq_clock(rq);
+ prev->sched_class->put_prev_task(rq, prev, now);
+ next = pick_next_task(rq, prev, now);
sched_info_switch(prev, next);
+
if (likely(prev != next)) {
- next->timestamp = next->last_ran = now;
rq->nr_switches++;
rq->curr = next;
++*switch_count;
- prepare_task_switch(rq, next);
- prev = context_switch(rq, prev, next);
- barrier();
- /*
- * this_rq must be evaluated again because prev may have moved
- * CPUs since it called schedule(), thus the 'rq' on its stack
- * frame will be invalid.
- */
- finish_task_switch(this_rq(), prev);
+ context_switch(rq, prev, next); /* unlocks the rq */
} else
spin_unlock_irq(&rq->lock);
- prev = current;
- if (unlikely(reacquire_kernel_lock(prev) < 0))
+ if (unlikely(reacquire_kernel_lock(current) < 0)) {
+ cpu = smp_processor_id();
+ rq = cpu_rq(cpu);
goto need_resched_nonpreemptible;
+ }
preempt_enable_no_resched();
if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
goto need_resched;
@@ -4045,74 +3705,85 @@ out:
}
EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
-
-#define SLEEP_ON_VAR \
- unsigned long flags; \
- wait_queue_t wait; \
- init_waitqueue_entry(&wait, current);
-
-#define SLEEP_ON_HEAD \
- spin_lock_irqsave(&q->lock,flags); \
- __add_wait_queue(q, &wait); \
+static inline void
+sleep_on_head(wait_queue_head_t *q, wait_queue_t *wait, unsigned long *flags)
+{
+ spin_lock_irqsave(&q->lock, *flags);
+ __add_wait_queue(q, wait);
spin_unlock(&q->lock);
+}
-#define SLEEP_ON_TAIL \
- spin_lock_irq(&q->lock); \
- __remove_wait_queue(q, &wait); \
- spin_unlock_irqrestore(&q->lock, flags);
+static inline void
+sleep_on_tail(wait_queue_head_t *q, wait_queue_t *wait, unsigned long *flags)
+{
+ spin_lock_irq(&q->lock);
+ __remove_wait_queue(q, wait);
+ spin_unlock_irqrestore(&q->lock, *flags);
+}
-void fastcall __sched interruptible_sleep_on(wait_queue_head_t *q)
+void __sched interruptible_sleep_on(wait_queue_head_t *q)
{
- SLEEP_ON_VAR
+ unsigned long flags;
+ wait_queue_t wait;
+
+ init_waitqueue_entry(&wait, current);
current->state = TASK_INTERRUPTIBLE;
- SLEEP_ON_HEAD
+ sleep_on_head(q, &wait, &flags);
schedule();
- SLEEP_ON_TAIL
+ sleep_on_tail(q, &wait, &flags);
}
EXPORT_SYMBOL(interruptible_sleep_on);
-long fastcall __sched
+long __sched
interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout)
{
- SLEEP_ON_VAR
+ unsigned long flags;
+ wait_queue_t wait;
+
+ init_waitqueue_entry(&wait, current);
current->state = TASK_INTERRUPTIBLE;
- SLEEP_ON_HEAD
+ sleep_on_head(q, &wait, &flags);
timeout = schedule_timeout(timeout);
- SLEEP_ON_TAIL
+ sleep_on_tail(q, &wait, &flags);
return timeout;
}
EXPORT_SYMBOL(interruptible_sleep_on_timeout);
-void fastcall __sched sleep_on(wait_queue_head_t *q)
+void __sched sleep_on(wait_queue_head_t *q)
{
- SLEEP_ON_VAR
+ unsigned long flags;
+ wait_queue_t wait;
+
+ init_waitqueue_entry(&wait, current);
current->state = TASK_UNINTERRUPTIBLE;
- SLEEP_ON_HEAD
+ sleep_on_head(q, &wait, &flags);
schedule();
- SLEEP_ON_TAIL
+ sleep_on_tail(q, &wait, &flags);
}
EXPORT_SYMBOL(sleep_on);
-long fastcall __sched sleep_on_timeout(wait_queue_head_t *q, long timeout)
+long __sched sleep_on_timeout(wait_queue_head_t *q, long timeout)
{
- SLEEP_ON_VAR
+ unsigned long flags;
+ wait_queue_t wait;
+
+ init_waitqueue_entry(&wait, current);
current->state = TASK_UNINTERRUPTIBLE;
- SLEEP_ON_HEAD
+ sleep_on_head(q, &wait, &flags);
timeout = schedule_timeout(timeout);
- SLEEP_ON_TAIL
+ sleep_on_tail(q, &wait, &flags);
return timeout;
}
-
EXPORT_SYMBOL(sleep_on_timeout);
#ifdef CONFIG_RT_MUTEXES
@@ -4129,29 +3800,30 @@ EXPORT_SYMBOL(sleep_on_timeout);
*/
void rt_mutex_setprio(struct task_struct *p, int prio)
{
- struct prio_array *array;
unsigned long flags;
+ int oldprio, on_rq;
struct rq *rq;
- int oldprio;
+ u64 now;
BUG_ON(prio < 0 || prio > MAX_PRIO);
rq = task_rq_lock(p, &flags);
+ now = rq_clock(rq);
oldprio = p->prio;
- array = p->array;
- if (array)
- dequeue_task(p, array);
+ on_rq = p->se.on_rq;
+ if (on_rq)
+ dequeue_task(rq, p, 0, now);
+
+ if (rt_prio(prio))
+ p->sched_class = &rt_sched_class;
+ else
+ p->sched_class = &fair_sched_class;
+
p->prio = prio;
- if (array) {
- /*
- * If changing to an RT priority then queue it
- * in the active array!
- */
- if (rt_task(p))
- array = rq->active;
- enqueue_task(p, array);
+ if (on_rq) {
+ enqueue_task(rq, p, 0, now);
/*
* Reschedule if we are currently running on this runqueue and
* our priority decreased, or if we are not currently running on
@@ -4160,8 +3832,9 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
if (task_running(rq, p)) {
if (p->prio > oldprio)
resched_task(rq->curr);
- } else if (TASK_PREEMPTS_CURR(p, rq))
- resched_task(rq->curr);
+ } else {
+ check_preempt_curr(rq, p);
+ }
}
task_rq_unlock(rq, &flags);
}
@@ -4170,10 +3843,10 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
void set_user_nice(struct task_struct *p, long nice)
{
- struct prio_array *array;
- int old_prio, delta;
+ int old_prio, delta, on_rq;
unsigned long flags;
struct rq *rq;
+ u64 now;
if (TASK_NICE(p) == nice || nice < -20 || nice > 19)
return;
@@ -4182,20 +3855,21 @@ void set_user_nice(struct task_struct *p, long nice)
* the task might be in the middle of scheduling on another CPU.
*/
rq = task_rq_lock(p, &flags);
+ now = rq_clock(rq);
/*
* The RT priorities are set via sched_setscheduler(), but we still
* allow the 'normal' nice value to be set - but as expected
* it wont have any effect on scheduling until the task is
- * not SCHED_NORMAL/SCHED_BATCH:
+ * SCHED_FIFO/SCHED_RR:
*/
- if (has_rt_policy(p)) {
+ if (task_has_rt_policy(p)) {
p->static_prio = NICE_TO_PRIO(nice);
goto out_unlock;
}
- array = p->array;
- if (array) {
- dequeue_task(p, array);
- dec_raw_weighted_load(rq, p);
+ on_rq = p->se.on_rq;
+ if (on_rq) {
+ dequeue_task(rq, p, 0, now);
+ dec_load(rq, p, now);
}
p->static_prio = NICE_TO_PRIO(nice);
@@ -4204,9 +3878,9 @@ void set_user_nice(struct task_struct *p, long nice)
p->prio = effective_prio(p);
delta = p->prio - old_prio;
- if (array) {
- enqueue_task(p, array);
- inc_raw_weighted_load(rq, p);
+ if (on_rq) {
+ enqueue_task(rq, p, 0, now);
+ inc_load(rq, p, now);
/*
* If the task increased its priority or is running and
* lowered its priority, then reschedule its CPU:
@@ -4326,20 +4000,28 @@ static inline struct task_struct *find_process_by_pid(pid_t pid)
}
/* Actually do priority change: must hold rq lock. */
-static void __setscheduler(struct task_struct *p, int policy, int prio)
+static void
+__setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
{
- BUG_ON(p->array);
+ BUG_ON(p->se.on_rq);
p->policy = policy;
+ switch (p->policy) {
+ case SCHED_NORMAL:
+ case SCHED_BATCH:
+ case SCHED_IDLE:
+ p->sched_class = &fair_sched_class;
+ break;
+ case SCHED_FIFO:
+ case SCHED_RR:
+ p->sched_class = &rt_sched_class;
+ break;
+ }
+
p->rt_priority = prio;
p->normal_prio = normal_prio(p);
/* we are holding p->pi_lock already */
p->prio = rt_mutex_getprio(p);
- /*
- * SCHED_BATCH tasks are treated as perpetual CPU hogs:
- */
- if (policy == SCHED_BATCH)
- p->sleep_avg = 0;
set_load_weight(p);
}
@@ -4354,8 +4036,7 @@ static void __setscheduler(struct task_struct *p, int policy, int prio)
int sched_setscheduler(struct task_struct *p, int policy,
struct sched_param *param)
{
- int retval, oldprio, oldpolicy = -1;
- struct prio_array *array;
+ int retval, oldprio, oldpolicy = -1, on_rq;
unsigned long flags;
struct rq *rq;
@@ -4366,27 +4047,27 @@ recheck:
if (policy < 0)
policy = oldpolicy = p->policy;
else if (policy != SCHED_FIFO && policy != SCHED_RR &&
- policy != SCHED_NORMAL && policy != SCHED_BATCH)
+ policy != SCHED_NORMAL && policy != SCHED_BATCH &&
+ policy != SCHED_IDLE)
return -EINVAL;
/*
* Valid priorities for SCHED_FIFO and SCHED_RR are
- * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL and
- * SCHED_BATCH is 0.
+ * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL,
+ * SCHED_BATCH and SCHED_IDLE is 0.
*/
if (param->sched_priority < 0 ||
(p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) ||
(!p->mm && param->sched_priority > MAX_RT_PRIO-1))
return -EINVAL;
- if (is_rt_policy(policy) != (param->sched_priority != 0))
+ if (rt_policy(policy) != (param->sched_priority != 0))
return -EINVAL;
/*
* Allow unprivileged RT tasks to decrease priority:
*/
if (!capable(CAP_SYS_NICE)) {
- if (is_rt_policy(policy)) {
+ if (rt_policy(policy)) {
unsigned long rlim_rtprio;
- unsigned long flags;
if (!lock_task_sighand(p, &flags))
return -ESRCH;
@@ -4402,6 +4083,12 @@ recheck:
param->sched_priority > rlim_rtprio)
return -EPERM;
}
+ /*
+ * Like positive nice levels, dont allow tasks to
+ * move out of SCHED_IDLE either:
+ */
+ if (p->policy == SCHED_IDLE && policy != SCHED_IDLE)
+ return -EPERM;
/* can't change other user's priorities */
if ((current->euid != p->euid) &&
@@ -4429,13 +4116,13 @@ recheck:
spin_unlock_irqrestore(&p->pi_lock, flags);
goto recheck;
}
- array = p->array;
- if (array)
- deactivate_task(p, rq);
+ on_rq = p->se.on_rq;
+ if (on_rq)
+ deactivate_task(rq, p, 0);
oldprio = p->prio;
- __setscheduler(p, policy, param->sched_priority);
- if (array) {
- __activate_task(p, rq);
+ __setscheduler(rq, p, policy, param->sched_priority);
+ if (on_rq) {
+ activate_task(rq, p, 0);
/*
* Reschedule if we are currently running on this runqueue and
* our priority decreased, or if we are not currently running on
@@ -4444,8 +4131,9 @@ recheck:
if (task_running(rq, p)) {
if (p->prio > oldprio)
resched_task(rq->curr);
- } else if (TASK_PREEMPTS_CURR(p, rq))
- resched_task(rq->curr);
+ } else {
+ check_preempt_curr(rq, p);
+ }
}
__task_rq_unlock(rq);
spin_unlock_irqrestore(&p->pi_lock, flags);
@@ -4717,41 +4405,18 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len,
/**
* sys_sched_yield - yield the current processor to other threads.
*
- * This function yields the current CPU by moving the calling thread
- * to the expired array. If there are no other threads running on this
- * CPU then this function will return.
+ * This function yields the current CPU to other tasks. If there are no
+ * other threads running on this CPU then this function will return.
*/
asmlinkage long sys_sched_yield(void)
{
struct rq *rq = this_rq_lock();
- struct prio_array *array = current->array, *target = rq->expired;
schedstat_inc(rq, yld_cnt);
- /*
- * We implement yielding by moving the task into the expired
- * queue.
- *
- * (special rule: RT tasks will just roundrobin in the active
- * array.)
- */
- if (rt_task(current))
- target = rq->active;
-
- if (array->nr_active == 1) {
+ if (unlikely(rq->nr_running == 1))
schedstat_inc(rq, yld_act_empty);
- if (!rq->expired->nr_active)
- schedstat_inc(rq, yld_both_empty);
- } else if (!rq->expired->nr_active)
- schedstat_inc(rq, yld_exp_empty);
-
- if (array != target) {
- dequeue_task(current, array);
- enqueue_task(current, target);
- } else
- /*
- * requeue_task is cheaper so perform that if possible.
- */
- requeue_task(current, array);
+ else
+ current->sched_class->yield_task(rq, current);
/*
* Since we are going to call schedule() anyway, there's
@@ -4902,6 +4567,7 @@ asmlinkage long sys_sched_get_priority_max(int policy)
break;
case SCHED_NORMAL:
case SCHED_BATCH:
+ case SCHED_IDLE:
ret = 0;
break;
}
@@ -4926,6 +4592,7 @@ asmlinkage long sys_sched_get_priority_min(int policy)
break;
case SCHED_NORMAL:
case SCHED_BATCH:
+ case SCHED_IDLE:
ret = 0;
}
return ret;
@@ -4960,7 +4627,7 @@ long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval)
goto out_unlock;
jiffies_to_timespec(p->policy == SCHED_FIFO ?
- 0 : task_timeslice(p), &t);
+ 0 : static_prio_timeslice(p->static_prio), &t);
read_unlock(&tasklist_lock);
retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
out_nounlock:
@@ -5035,6 +4702,9 @@ void show_state_filter(unsigned long state_filter)
touch_all_softlockup_watchdogs();
+#ifdef CONFIG_SCHED_DEBUG
+ sysrq_sched_debug_show();
+#endif
read_unlock(&tasklist_lock);
/*
* Only show locks if all tasks are dumped:
@@ -5043,6 +4713,11 @@ void show_state_filter(unsigned long state_filter)
debug_show_all_locks();
}
+void __cpuinit init_idle_bootup_task(struct task_struct *idle)
+{
+ idle->sched_class = &idle_sched_class;
+}
+
/**
* init_idle - set up an idle thread for a given CPU
* @idle: task in question
@@ -5056,13 +4731,12 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
struct rq *rq = cpu_rq(cpu);
unsigned long flags;
- idle->timestamp = sched_clock();
- idle->sleep_avg = 0;
- idle->array = NULL;
+ __sched_fork(idle);
+ idle->se.exec_start = sched_clock();
+
idle->prio = idle->normal_prio = MAX_PRIO;
- idle->state = TASK_RUNNING;
idle->cpus_allowed = cpumask_of_cpu(cpu);
- set_task_cpu(idle, cpu);
+ __set_task_cpu(idle, cpu);
spin_lock_irqsave(&rq->lock, flags);
rq->curr = rq->idle = idle;
@@ -5077,6 +4751,10 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
#else
task_thread_info(idle)->preempt_count = 0;
#endif
+ /*
+ * The idle tasks have their own, simple scheduling class:
+ */
+ idle->sched_class = &idle_sched_class;
}
/*
@@ -5088,6 +4766,28 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
*/
cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
+/*
+ * Increase the granularity value when there are more CPUs,
+ * because with more CPUs the 'effective latency' as visible
+ * to users decreases. But the relationship is not linear,
+ * so pick a second-best guess by going with the log2 of the
+ * number of CPUs.
+ *
+ * This idea comes from the SD scheduler of Con Kolivas:
+ */
+static inline void sched_init_granularity(void)
+{
+ unsigned int factor = 1 + ilog2(num_online_cpus());
+ const unsigned long gran_limit = 10000000;
+
+ sysctl_sched_granularity *= factor;
+ if (sysctl_sched_granularity > gran_limit)
+ sysctl_sched_granularity = gran_limit;
+
+ sysctl_sched_runtime_limit = sysctl_sched_granularity * 4;
+ sysctl_sched_wakeup_granularity = sysctl_sched_granularity / 2;
+}
+
#ifdef CONFIG_SMP
/*
* This is how migration works:
@@ -5161,7 +4861,7 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed);
static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
{
struct rq *rq_dest, *rq_src;
- int ret = 0;
+ int ret = 0, on_rq;
if (unlikely(cpu_is_offline(dest_cpu)))
return ret;
@@ -5177,20 +4877,13 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
if (!cpu_isset(dest_cpu, p->cpus_allowed))
goto out;
+ on_rq = p->se.on_rq;
+ if (on_rq)
+ deactivate_task(rq_src, p, 0);
set_task_cpu(p, dest_cpu);
- if (p->array) {
- /*
- * Sync timestamp with rq_dest's before activating.
- * The same thing could be achieved by doing this step
- * afterwards, and pretending it was a local activate.
- * This way is cleaner and logically correct.
- */
- p->timestamp = p->timestamp - rq_src->most_recent_timestamp
- + rq_dest->most_recent_timestamp;
- deactivate_task(p, rq_src);
- __activate_task(p, rq_dest);
- if (TASK_PREEMPTS_CURR(p, rq_dest))
- resched_task(rq_dest->curr);
+ if (on_rq) {
+ activate_task(rq_dest, p, 0);
+ check_preempt_curr(rq_dest, p);
}
ret = 1;
out:
@@ -5342,7 +5035,8 @@ static void migrate_live_tasks(int src_cpu)
write_unlock_irq(&tasklist_lock);
}
-/* Schedules idle task to be the next runnable task on current CPU.
+/*
+ * Schedules idle task to be the next runnable task on current CPU.
* It does so by boosting its priority to highest possible and adding it to
* the _front_ of the runqueue. Used by CPU offline code.
*/
@@ -5362,10 +5056,10 @@ void sched_idle_next(void)
*/
spin_lock_irqsave(&rq->lock, flags);
- __setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1);
+ __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1);
/* Add idle task to the _front_ of its priority queue: */
- __activate_idle_task(p, rq);
+ activate_idle_task(p, rq);
spin_unlock_irqrestore(&rq->lock, flags);
}
@@ -5415,16 +5109,15 @@ static void migrate_dead(unsigned int dead_cpu, struct task_struct *p)
static void migrate_dead_tasks(unsigned int dead_cpu)
{
struct rq *rq = cpu_rq(dead_cpu);
- unsigned int arr, i;
+ struct task_struct *next;
- for (arr = 0; arr < 2; arr++) {
- for (i = 0; i < MAX_PRIO; i++) {
- struct list_head *list = &rq->arrays[arr].queue[i];
-
- while (!list_empty(list))
- migrate_dead(dead_cpu, list_entry(list->next,
- struct task_struct, run_list));
- }
+ for ( ; ; ) {
+ if (!rq->nr_running)
+ break;
+ next = pick_next_task(rq, rq->curr, rq_clock(rq));
+ if (!next)
+ break;
+ migrate_dead(dead_cpu, next);
}
}
#endif /* CONFIG_HOTPLUG_CPU */
@@ -5448,14 +5141,14 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
- p = kthread_create(migration_thread, hcpu, "migration/%d",cpu);
+ p = kthread_create(migration_thread, hcpu, "migration/%d", cpu);
if (IS_ERR(p))
return NOTIFY_BAD;
p->flags |= PF_NOFREEZE;
kthread_bind(p, cpu);
/* Must be high prio: stop_machine expects to yield to it. */
rq = task_rq_lock(p, &flags);
- __setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1);
+ __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1);
task_rq_unlock(rq, &flags);
cpu_rq(cpu)->migration_thread = p;
break;
@@ -5486,9 +5179,10 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
rq->migration_thread = NULL;
/* Idle task back to normal (off runqueue, low prio) */
rq = task_rq_lock(rq->idle, &flags);
- deactivate_task(rq->idle, rq);
+ deactivate_task(rq, rq->idle, 0);
rq->idle->static_prio = MAX_PRIO;
- __setscheduler(rq->idle, SCHED_NORMAL, 0);
+ __setscheduler(rq, rq->idle, SCHED_NORMAL, 0);
+ rq->idle->sched_class = &idle_sched_class;
migrate_dead_tasks(cpu);
task_rq_unlock(rq, &flags);
migrate_nr_uninterruptible(rq);
@@ -5797,483 +5491,6 @@ init_sched_build_groups(cpumask_t span, const cpumask_t *cpu_map,
#define SD_NODES_PER_DOMAIN 16
-/*
- * Self-tuning task migration cost measurement between source and target CPUs.
- *
- * This is done by measuring the cost of manipulating buffers of varying
- * sizes. For a given buffer-size here are the steps that are taken:
- *
- * 1) the source CPU reads+dirties a shared buffer
- * 2) the target CPU reads+dirties the same shared buffer
- *
- * We measure how long they take, in the following 4 scenarios:
- *
- * - source: CPU1, target: CPU2 | cost1
- * - source: CPU2, target: CPU1 | cost2
- * - source: CPU1, target: CPU1 | cost3
- * - source: CPU2, target: CPU2 | cost4
- *
- * We then calculate the cost3+cost4-cost1-cost2 difference - this is
- * the cost of migration.
- *
- * We then start off from a small buffer-size and iterate up to larger
- * buffer sizes, in 5% steps - measuring each buffer-size separately, and
- * doing a maximum search for the cost. (The maximum cost for a migration
- * normally occurs when the working set size is around the effective cache
- * size.)
- */
-#define SEARCH_SCOPE 2
-#define MIN_CACHE_SIZE (64*1024U)
-#define DEFAULT_CACHE_SIZE (5*1024*1024U)
-#define ITERATIONS 1
-#define SIZE_THRESH 130
-#define COST_THRESH 130
-
-/*
- * The migration cost is a function of 'domain distance'. Domain
- * distance is the number of steps a CPU has to iterate down its
- * domain tree to share a domain with the other CPU. The farther
- * two CPUs are from each other, the larger the distance gets.
- *
- * Note that we use the distance only to cache measurement results,
- * the distance value is not used numerically otherwise. When two
- * CPUs have the same distance it is assumed that the migration
- * cost is the same. (this is a simplification but quite practical)
- */
-#define MAX_DOMAIN_DISTANCE 32
-
-static unsigned long long migration_cost[MAX_DOMAIN_DISTANCE] =
- { [ 0 ... MAX_DOMAIN_DISTANCE-1 ] =
-/*
- * Architectures may override the migration cost and thus avoid
- * boot-time calibration. Unit is nanoseconds. Mostly useful for
- * virtualized hardware:
- */
-#ifdef CONFIG_DEFAULT_MIGRATION_COST
- CONFIG_DEFAULT_MIGRATION_COST
-#else
- -1LL
-#endif
-};
-
-/*
- * Allow override of migration cost - in units of microseconds.
- * E.g. migration_cost=1000,2000,3000 will set up a level-1 cost
- * of 1 msec, level-2 cost of 2 msecs and level3 cost of 3 msecs:
- */
-static int __init migration_cost_setup(char *str)
-{
- int ints[MAX_DOMAIN_DISTANCE+1], i;
-
- str = get_options(str, ARRAY_SIZE(ints), ints);
-
- printk("#ints: %d\n", ints[0]);
- for (i = 1; i <= ints[0]; i++) {
- migration_cost[i-1] = (unsigned long long)ints[i]*1000;
- printk("migration_cost[%d]: %Ld\n", i-1, migration_cost[i-1]);
- }
- return 1;
-}
-
-__setup ("migration_cost=", migration_cost_setup);
-
-/*
- * Global multiplier (divisor) for migration-cutoff values,
- * in percentiles. E.g. use a value of 150 to get 1.5 times
- * longer cache-hot cutoff times.
- *
- * (We scale it from 100 to 128 to long long handling easier.)
- */
-
-#define MIGRATION_FACTOR_SCALE 128
-
-static unsigned int migration_factor = MIGRATION_FACTOR_SCALE;
-
-static int __init setup_migration_factor(char *str)
-{
- get_option(&str, &migration_factor);
- migration_factor = migration_factor * MIGRATION_FACTOR_SCALE / 100;
- return 1;
-}
-
-__setup("migration_factor=", setup_migration_factor);
-
-/*
- * Estimated distance of two CPUs, measured via the number of domains
- * we have to pass for the two CPUs to be in the same span:
- */
-static unsigned long domain_distance(int cpu1, int cpu2)
-{
- unsigned long distance = 0;
- struct sched_domain *sd;
-
- for_each_domain(cpu1, sd) {
- WARN_ON(!cpu_isset(cpu1, sd->span));
- if (cpu_isset(cpu2, sd->span))
- return distance;
- distance++;
- }
- if (distance >= MAX_DOMAIN_DISTANCE) {
- WARN_ON(1);
- distance = MAX_DOMAIN_DISTANCE-1;
- }
-
- return distance;
-}
-
-static unsigned int migration_debug;
-
-static int __init setup_migration_debug(char *str)
-{
- get_option(&str, &migration_debug);
- return 1;
-}
-
-__setup("migration_debug=", setup_migration_debug);
-
-/*
- * Maximum cache-size that the scheduler should try to measure.
- * Architectures with larger caches should tune this up during
- * bootup. Gets used in the domain-setup code (i.e. during SMP
- * bootup).
- */
-unsigned int max_cache_size;
-
-static int __init setup_max_cache_size(char *str)
-{
- get_option(&str, &max_cache_size);
- return 1;
-}
-
-__setup("max_cache_size=", setup_max_cache_size);
-
-/*
- * Dirty a big buffer in a hard-to-predict (for the L2 cache) way. This
- * is the operation that is timed, so we try to generate unpredictable
- * cachemisses that still end up filling the L2 cache:
- */
-static void touch_cache(void *__cache, unsigned long __size)
-{
- unsigned long size = __size / sizeof(long);
- unsigned long chunk1 = size / 3;
- unsigned long chunk2 = 2 * size / 3;
- unsigned long *cache = __cache;
- int i;
-
- for (i = 0; i < size/6; i += 8) {
- switch (i % 6) {
- case 0: cache[i]++;
- case 1: cache[size-1-i]++;
- case 2: cache[chunk1-i]++;
- case 3: cache[chunk1+i]++;
- case 4: cache[chunk2-i]++;
- case 5: cache[chunk2+i]++;
- }
- }
-}
-
-/*
- * Measure the cache-cost of one task migration. Returns in units of nsec.
- */
-static unsigned long long
-measure_one(void *cache, unsigned long size, int source, int target)
-{
- cpumask_t mask, saved_mask;
- unsigned long long t0, t1, t2, t3, cost;
-
- saved_mask = current->cpus_allowed;
-
- /*
- * Flush source caches to RAM and invalidate them:
- */
- sched_cacheflush();
-
- /*
- * Migrate to the source CPU:
- */
- mask = cpumask_of_cpu(source);
- set_cpus_allowed(current, mask);
- WARN_ON(smp_processor_id() != source);
-
- /*
- * Dirty the working set:
- */
- t0 = sched_clock();
- touch_cache(cache, size);
- t1 = sched_clock();
-
- /*
- * Migrate to the target CPU, dirty the L2 cache and access
- * the shared buffer. (which represents the working set
- * of a migrated task.)
- */
- mask = cpumask_of_cpu(target);
- set_cpus_allowed(current, mask);
- WARN_ON(smp_processor_id() != target);
-
- t2 = sched_clock();
- touch_cache(cache, size);
- t3 = sched_clock();
-
- cost = t1-t0 + t3-t2;
-
- if (migration_debug >= 2)
- printk("[%d->%d]: %8Ld %8Ld %8Ld => %10Ld.\n",
- source, target, t1-t0, t1-t0, t3-t2, cost);
- /*
- * Flush target caches to RAM and invalidate them:
- */
- sched_cacheflush();
-
- set_cpus_allowed(current, saved_mask);
-
- return cost;
-}
-
-/*
- * Measure a series of task migrations and return the average
- * result. Since this code runs early during bootup the system
- * is 'undisturbed' and the average latency makes sense.
- *
- * The algorithm in essence auto-detects the relevant cache-size,
- * so it will properly detect different cachesizes for different
- * cache-hierarchies, depending on how the CPUs are connected.
- *
- * Architectures can prime the upper limit of the search range via
- * max_cache_size, otherwise the search range defaults to 20MB...64K.
- */
-static unsigned long long
-measure_cost(int cpu1, int cpu2, void *cache, unsigned int size)
-{
- unsigned long long cost1, cost2;
- int i;
-
- /*
- * Measure the migration cost of 'size' bytes, over an
- * average of 10 runs:
- *
- * (We perturb the cache size by a small (0..4k)
- * value to compensate size/alignment related artifacts.
- * We also subtract the cost of the operation done on
- * the same CPU.)
- */
- cost1 = 0;
-
- /*
- * dry run, to make sure we start off cache-cold on cpu1,
- * and to get any vmalloc pagefaults in advance:
- */
- measure_one(cache, size, cpu1, cpu2);
- for (i = 0; i < ITERATIONS; i++)
- cost1 += measure_one(cache, size - i * 1024, cpu1, cpu2);
-
- measure_one(cache, size, cpu2, cpu1);
- for (i = 0; i < ITERATIONS; i++)
- cost1 += measure_one(cache, size - i * 1024, cpu2, cpu1);
-
- /*
- * (We measure the non-migrating [cached] cost on both
- * cpu1 and cpu2, to handle CPUs with different speeds)
- */
- cost2 = 0;
-
- measure_one(cache, size, cpu1, cpu1);
- for (i = 0; i < ITERATIONS; i++)
- cost2 += measure_one(cache, size - i * 1024, cpu1, cpu1);
-
- measure_one(cache, size, cpu2, cpu2);
- for (i = 0; i < ITERATIONS; i++)
- cost2 += measure_one(cache, size - i * 1024, cpu2, cpu2);
-
- /*
- * Get the per-iteration migration cost:
- */
- do_div(cost1, 2 * ITERATIONS);
- do_div(cost2, 2 * ITERATIONS);
-
- return cost1 - cost2;
-}
-
-static unsigned long long measure_migration_cost(int cpu1, int cpu2)
-{
- unsigned long long max_cost = 0, fluct = 0, avg_fluct = 0;
- unsigned int max_size, size, size_found = 0;
- long long cost = 0, prev_cost;
- void *cache;
-
- /*
- * Search from max_cache_size*5 down to 64K - the real relevant
- * cachesize has to lie somewhere inbetween.
- */
- if (max_cache_size) {
- max_size = max(max_cache_size * SEARCH_SCOPE, MIN_CACHE_SIZE);
- size = max(max_cache_size / SEARCH_SCOPE, MIN_CACHE_SIZE);
- } else {
- /*
- * Since we have no estimation about the relevant
- * search range
- */
- max_size = DEFAULT_CACHE_SIZE * SEARCH_SCOPE;
- size = MIN_CACHE_SIZE;
- }
-
- if (!cpu_online(cpu1) || !cpu_online(cpu2)) {
- printk("cpu %d and %d not both online!\n", cpu1, cpu2);
- return 0;
- }
-
- /*
- * Allocate the working set:
- */
- cache = vmalloc(max_size);
- if (!cache) {
- printk("could not vmalloc %d bytes for cache!\n", 2 * max_size);
- return 1000000; /* return 1 msec on very small boxen */
- }
-
- while (size <= max_size) {
- prev_cost = cost;
- cost = measure_cost(cpu1, cpu2, cache, size);
-
- /*
- * Update the max:
- */
- if (cost > 0) {
- if (max_cost < cost) {
- max_cost = cost;
- size_found = size;
- }
- }
- /*
- * Calculate average fluctuation, we use this to prevent
- * noise from triggering an early break out of the loop:
- */
- fluct = abs(cost - prev_cost);
- avg_fluct = (avg_fluct + fluct)/2;
-
- if (migration_debug)
- printk("-> [%d][%d][%7d] %3ld.%ld [%3ld.%ld] (%ld): "
- "(%8Ld %8Ld)\n",
- cpu1, cpu2, size,
- (long)cost / 1000000,
- ((long)cost / 100000) % 10,
- (long)max_cost / 1000000,
- ((long)max_cost / 100000) % 10,
- domain_distance(cpu1, cpu2),
- cost, avg_fluct);
-
- /*
- * If we iterated at least 20% past the previous maximum,
- * and the cost has dropped by more than 20% already,
- * (taking fluctuations into account) then we assume to
- * have found the maximum and break out of the loop early:
- */
- if (size_found && (size*100 > size_found*SIZE_THRESH))
- if (cost+avg_fluct <= 0 ||
- max_cost*100 > (cost+avg_fluct)*COST_THRESH) {
-
- if (migration_debug)
- printk("-> found max.\n");
- break;
- }
- /*
- * Increase the cachesize in 10% steps:
- */
- size = size * 10 / 9;
- }
-
- if (migration_debug)
- printk("[%d][%d] working set size found: %d, cost: %Ld\n",
- cpu1, cpu2, size_found, max_cost);
-
- vfree(cache);
-
- /*
- * A task is considered 'cache cold' if at least 2 times
- * the worst-case cost of migration has passed.
- *
- * (this limit is only listened to if the load-balancing
- * situation is 'nice' - if there is a large imbalance we
- * ignore it for the sake of CPU utilization and
- * processing fairness.)
- */
- return 2 * max_cost * migration_factor / MIGRATION_FACTOR_SCALE;
-}
-
-static void calibrate_migration_costs(const cpumask_t *cpu_map)
-{
- int cpu1 = -1, cpu2 = -1, cpu, orig_cpu = raw_smp_processor_id();
- unsigned long j0, j1, distance, max_distance = 0;
- struct sched_domain *sd;
-
- j0 = jiffies;
-
- /*
- * First pass - calculate the cacheflush times:
- */
- for_each_cpu_mask(cpu1, *cpu_map) {
- for_each_cpu_mask(cpu2, *cpu_map) {
- if (cpu1 == cpu2)
- continue;
- distance = domain_distance(cpu1, cpu2);
- max_distance = max(max_distance, distance);
- /*
- * No result cached yet?
- */
- if (migration_cost[distance] == -1LL)
- migration_cost[distance] =
- measure_migration_cost(cpu1, cpu2);
- }
- }
- /*
- * Second pass - update the sched domain hierarchy with
- * the new cache-hot-time estimations:
- */
- for_each_cpu_mask(cpu, *cpu_map) {
- distance = 0;
- for_each_domain(cpu, sd) {
- sd->cache_hot_time = migration_cost[distance];
- distance++;
- }
- }
- /*
- * Print the matrix:
- */
- if (migration_debug)
- printk("migration: max_cache_size: %d, cpu: %d MHz:\n",
- max_cache_size,
-#ifdef CONFIG_X86
- cpu_khz/1000
-#else
- -1
-#endif
- );
- if (system_state == SYSTEM_BOOTING && num_online_cpus() > 1) {
- printk("migration_cost=");
- for (distance = 0; distance <= max_distance; distance++) {
- if (distance)
- printk(",");
- printk("%ld", (long)migration_cost[distance] / 1000);
- }
- printk("\n");
- }
- j1 = jiffies;
- if (migration_debug)
- printk("migration: %ld seconds\n", (j1-j0) / HZ);
-
- /*
- * Move back to the original CPU. NUMA-Q gets confused
- * if we migrate to another quad during bootup.
- */
- if (raw_smp_processor_id() != orig_cpu) {
- cpumask_t mask = cpumask_of_cpu(orig_cpu),
- saved_mask = current->cpus_allowed;
-
- set_cpus_allowed(current, mask);
- set_cpus_allowed(current, saved_mask);
- }
-}
-
#ifdef CONFIG_NUMA
/**
@@ -6574,7 +5791,6 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
static int build_sched_domains(const cpumask_t *cpu_map)
{
int i;
- struct sched_domain *sd;
#ifdef CONFIG_NUMA
struct sched_group **sched_group_nodes = NULL;
int sd_allnodes = 0;
@@ -6582,7 +5798,7 @@ static int build_sched_domains(const cpumask_t *cpu_map)
/*
* Allocate the per-node list of sched groups
*/
- sched_group_nodes = kzalloc(sizeof(struct sched_group*)*MAX_NUMNODES,
+ sched_group_nodes = kzalloc(sizeof(struct sched_group *)*MAX_NUMNODES,
GFP_KERNEL);
if (!sched_group_nodes) {
printk(KERN_WARNING "Can not alloc sched group node list\n");
@@ -6601,8 +5817,8 @@ static int build_sched_domains(const cpumask_t *cpu_map)
cpus_and(nodemask, nodemask, *cpu_map);
#ifdef CONFIG_NUMA
- if (cpus_weight(*cpu_map)
- > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) {
+ if (cpus_weight(*cpu_map) >
+ SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) {
sd = &per_cpu(allnodes_domains, i);
*sd = SD_ALLNODES_INIT;
sd->span = *cpu_map;
@@ -6661,7 +5877,8 @@ static int build_sched_domains(const cpumask_t *cpu_map)
if (i != first_cpu(this_sibling_map))
continue;
- init_sched_build_groups(this_sibling_map, cpu_map, &cpu_to_cpu_group);
+ init_sched_build_groups(this_sibling_map, cpu_map,
+ &cpu_to_cpu_group);
}
#endif
@@ -6672,11 +5889,11 @@ static int build_sched_domains(const cpumask_t *cpu_map)
cpus_and(this_core_map, this_core_map, *cpu_map);
if (i != first_cpu(this_core_map))
continue;
- init_sched_build_groups(this_core_map, cpu_map, &cpu_to_core_group);
+ init_sched_build_groups(this_core_map, cpu_map,
+ &cpu_to_core_group);
}
#endif
-
/* Set up physical groups */
for (i = 0; i < MAX_NUMNODES; i++) {
cpumask_t nodemask = node_to_cpumask(i);
@@ -6691,7 +5908,8 @@ static int build_sched_domains(const cpumask_t *cpu_map)
#ifdef CONFIG_NUMA
/* Set up node groups */
if (sd_allnodes)
- init_sched_build_groups(*cpu_map, cpu_map, &cpu_to_allnodes_group);
+ init_sched_build_groups(*cpu_map, cpu_map,
+ &cpu_to_allnodes_group);
for (i = 0; i < MAX_NUMNODES; i++) {
/* Set up node groups */
@@ -6719,6 +5937,7 @@ static int build_sched_domains(const cpumask_t *cpu_map)
sched_group_nodes[i] = sg;
for_each_cpu_mask(j, nodemask) {
struct sched_domain *sd;
+
sd = &per_cpu(node_domains, j);
sd->groups = sg;
}
@@ -6763,19 +5982,22 @@ static int build_sched_domains(const cpumask_t *cpu_map)
/* Calculate CPU power for physical packages and nodes */
#ifdef CONFIG_SCHED_SMT
for_each_cpu_mask(i, *cpu_map) {
- sd = &per_cpu(cpu_domains, i);
+ struct sched_domain *sd = &per_cpu(cpu_domains, i);
+
init_sched_groups_power(i, sd);
}
#endif
#ifdef CONFIG_SCHED_MC
for_each_cpu_mask(i, *cpu_map) {
- sd = &per_cpu(core_domains, i);
+ struct sched_domain *sd = &per_cpu(core_domains, i);
+
init_sched_groups_power(i, sd);
}
#endif
for_each_cpu_mask(i, *cpu_map) {
- sd = &per_cpu(phys_domains, i);
+ struct sched_domain *sd = &per_cpu(phys_domains, i);
+
init_sched_groups_power(i, sd);
}
@@ -6803,10 +6025,6 @@ static int build_sched_domains(const cpumask_t *cpu_map)
#endif
cpu_attach_domain(sd, i);
}
- /*
- * Tune cache-hot values:
- */
- calibrate_migration_costs(cpu_map);
return 0;
@@ -7013,10 +6231,12 @@ void __init sched_init_smp(void)
/* Move init over to a non-isolated CPU */
if (set_cpus_allowed(current, non_isolated_cpus) < 0)
BUG();
+ sched_init_granularity();
}
#else
void __init sched_init_smp(void)
{
+ sched_init_granularity();
}
#endif /* CONFIG_SMP */
@@ -7030,28 +6250,51 @@ int in_sched_functions(unsigned long addr)
&& addr < (unsigned long)__sched_text_end);
}
+static inline void init_cfs_rq(struct cfs_rq *cfs_rq, struct rq *rq)
+{
+ cfs_rq->tasks_timeline = RB_ROOT;
+ cfs_rq->fair_clock = 1;
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ cfs_rq->rq = rq;
+#endif
+}
+
void __init sched_init(void)
{
- int i, j, k;
+ u64 now = sched_clock();
int highest_cpu = 0;
+ int i, j;
+
+ /*
+ * Link up the scheduling class hierarchy:
+ */
+ rt_sched_class.next = &fair_sched_class;
+ fair_sched_class.next = &idle_sched_class;
+ idle_sched_class.next = NULL;
for_each_possible_cpu(i) {
- struct prio_array *array;
+ struct rt_prio_array *array;
struct rq *rq;
rq = cpu_rq(i);
spin_lock_init(&rq->lock);
lockdep_set_class(&rq->lock, &rq->rq_lock_key);
rq->nr_running = 0;
- rq->active = rq->arrays;
- rq->expired = rq->arrays + 1;
- rq->best_expired_prio = MAX_PRIO;
+ rq->clock = 1;
+ init_cfs_rq(&rq->cfs, rq);
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
+ list_add(&rq->cfs.leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
+#endif
+ rq->ls.load_update_last = now;
+ rq->ls.load_update_start = now;
+ for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
+ rq->cpu_load[j] = 0;
#ifdef CONFIG_SMP
rq->sd = NULL;
- for (j = 1; j < 3; j++)
- rq->cpu_load[j] = 0;
rq->active_balance = 0;
+ rq->next_balance = jiffies;
rq->push_cpu = 0;
rq->cpu = i;
rq->migration_thread = NULL;
@@ -7059,16 +6302,14 @@ void __init sched_init(void)
#endif
atomic_set(&rq->nr_iowait, 0);
- for (j = 0; j < 2; j++) {
- array = rq->arrays + j;
- for (k = 0; k < MAX_PRIO; k++) {
- INIT_LIST_HEAD(array->queue + k);
- __clear_bit(k, array->bitmap);
- }
- // delimiter for bitsearch
- __set_bit(MAX_PRIO, array->bitmap);
+ array = &rq->rt.active;
+ for (j = 0; j < MAX_RT_PRIO; j++) {
+ INIT_LIST_HEAD(array->queue + j);
+ __clear_bit(j, array->bitmap);
}
highest_cpu = i;
+ /* delimiter for bitsearch: */
+ __set_bit(MAX_RT_PRIO, array->bitmap);
}
set_load_weight(&init_task);
@@ -7095,6 +6336,10 @@ void __init sched_init(void)
* when this runqueue becomes "idle".
*/
init_idle(current, smp_processor_id());
+ /*
+ * During early bootup we pretend to be a normal task:
+ */
+ current->sched_class = &fair_sched_class;
}
#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
@@ -7125,29 +6370,55 @@ EXPORT_SYMBOL(__might_sleep);
#ifdef CONFIG_MAGIC_SYSRQ
void normalize_rt_tasks(void)
{
- struct prio_array *array;
struct task_struct *g, *p;
unsigned long flags;
struct rq *rq;
+ int on_rq;
read_lock_irq(&tasklist_lock);
-
do_each_thread(g, p) {
- if (!rt_task(p))
+ p->se.fair_key = 0;
+ p->se.wait_runtime = 0;
+ p->se.wait_start_fair = 0;
+ p->se.wait_start = 0;
+ p->se.exec_start = 0;
+ p->se.sleep_start = 0;
+ p->se.sleep_start_fair = 0;
+ p->se.block_start = 0;
+ task_rq(p)->cfs.fair_clock = 0;
+ task_rq(p)->clock = 0;
+
+ if (!rt_task(p)) {
+ /*
+ * Renice negative nice level userspace
+ * tasks back to 0:
+ */
+ if (TASK_NICE(p) < 0 && p->mm)
+ set_user_nice(p, 0);
continue;
+ }
spin_lock_irqsave(&p->pi_lock, flags);
rq = __task_rq_lock(p);
+#ifdef CONFIG_SMP
+ /*
+ * Do not touch the migration thread:
+ */
+ if (p == rq->migration_thread)
+ goto out_unlock;
+#endif
- array = p->array;
- if (array)
- deactivate_task(p, task_rq(p));
- __setscheduler(p, SCHED_NORMAL, 0);
- if (array) {
- __activate_task(p, task_rq(p));
+ on_rq = p->se.on_rq;
+ if (on_rq)
+ deactivate_task(task_rq(p), p, 0);
+ __setscheduler(rq, p, SCHED_NORMAL, 0);
+ if (on_rq) {
+ activate_task(task_rq(p), p, 0);
resched_task(rq->curr);
}
-
+#ifdef CONFIG_SMP
+ out_unlock:
+#endif
__task_rq_unlock(rq);
spin_unlock_irqrestore(&p->pi_lock, flags);
} while_each_thread(g, p);
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
new file mode 100644
index 00000000000..1baf87cceb7
--- /dev/null
+++ b/kernel/sched_debug.c
@@ -0,0 +1,275 @@
+/*
+ * kernel/time/sched_debug.c
+ *
+ * Print the CFS rbtree
+ *
+ * Copyright(C) 2007, Red Hat, Inc., Ingo Molnar
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/proc_fs.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/kallsyms.h>
+#include <linux/utsname.h>
+
+/*
+ * This allows printing both to /proc/sched_debug and
+ * to the console
+ */
+#define SEQ_printf(m, x...) \
+ do { \
+ if (m) \
+ seq_printf(m, x); \
+ else \
+ printk(x); \
+ } while (0)
+
+static void
+print_task(struct seq_file *m, struct rq *rq, struct task_struct *p, u64 now)
+{
+ if (rq->curr == p)
+ SEQ_printf(m, "R");
+ else
+ SEQ_printf(m, " ");
+
+ SEQ_printf(m, "%15s %5d %15Ld %13Ld %13Ld %9Ld %5d "
+ "%15Ld %15Ld %15Ld %15Ld %15Ld\n",
+ p->comm, p->pid,
+ (long long)p->se.fair_key,
+ (long long)(p->se.fair_key - rq->cfs.fair_clock),
+ (long long)p->se.wait_runtime,
+ (long long)(p->nvcsw + p->nivcsw),
+ p->prio,
+ (long long)p->se.sum_exec_runtime,
+ (long long)p->se.sum_wait_runtime,
+ (long long)p->se.sum_sleep_runtime,
+ (long long)p->se.wait_runtime_overruns,
+ (long long)p->se.wait_runtime_underruns);
+}
+
+static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu, u64 now)
+{
+ struct task_struct *g, *p;
+
+ SEQ_printf(m,
+ "\nrunnable tasks:\n"
+ " task PID tree-key delta waiting"
+ " switches prio"
+ " sum-exec sum-wait sum-sleep"
+ " wait-overrun wait-underrun\n"
+ "------------------------------------------------------------------"
+ "----------------"
+ "------------------------------------------------"
+ "--------------------------------\n");
+
+ read_lock_irq(&tasklist_lock);
+
+ do_each_thread(g, p) {
+ if (!p->se.on_rq || task_cpu(p) != rq_cpu)
+ continue;
+
+ print_task(m, rq, p, now);
+ } while_each_thread(g, p);
+
+ read_unlock_irq(&tasklist_lock);
+}
+
+static void
+print_cfs_rq_runtime_sum(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
+{
+ s64 wait_runtime_rq_sum = 0;
+ struct task_struct *p;
+ struct rb_node *curr;
+ unsigned long flags;
+ struct rq *rq = &per_cpu(runqueues, cpu);
+
+ spin_lock_irqsave(&rq->lock, flags);
+ curr = first_fair(cfs_rq);
+ while (curr) {
+ p = rb_entry(curr, struct task_struct, se.run_node);
+ wait_runtime_rq_sum += p->se.wait_runtime;
+
+ curr = rb_next(curr);
+ }
+ spin_unlock_irqrestore(&rq->lock, flags);
+
+ SEQ_printf(m, " .%-30s: %Ld\n", "wait_runtime_rq_sum",
+ (long long)wait_runtime_rq_sum);
+}
+
+void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq, u64 now)
+{
+ SEQ_printf(m, "\ncfs_rq %p\n", cfs_rq);
+
+#define P(x) \
+ SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(cfs_rq->x))
+
+ P(fair_clock);
+ P(exec_clock);
+ P(wait_runtime);
+ P(wait_runtime_overruns);
+ P(wait_runtime_underruns);
+ P(sleeper_bonus);
+#undef P
+
+ print_cfs_rq_runtime_sum(m, cpu, cfs_rq);
+}
+
+static void print_cpu(struct seq_file *m, int cpu, u64 now)
+{
+ struct rq *rq = &per_cpu(runqueues, cpu);
+
+#ifdef CONFIG_X86
+ {
+ unsigned int freq = cpu_khz ? : 1;
+
+ SEQ_printf(m, "\ncpu#%d, %u.%03u MHz\n",
+ cpu, freq / 1000, (freq % 1000));
+ }
+#else
+ SEQ_printf(m, "\ncpu#%d\n", cpu);
+#endif
+
+#define P(x) \
+ SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rq->x))
+
+ P(nr_running);
+ SEQ_printf(m, " .%-30s: %lu\n", "load",
+ rq->ls.load.weight);
+ P(ls.delta_fair);
+ P(ls.delta_exec);
+ P(nr_switches);
+ P(nr_load_updates);
+ P(nr_uninterruptible);
+ SEQ_printf(m, " .%-30s: %lu\n", "jiffies", jiffies);
+ P(next_balance);
+ P(curr->pid);
+ P(clock);
+ P(prev_clock_raw);
+ P(clock_warps);
+ P(clock_overflows);
+ P(clock_unstable_events);
+ P(clock_max_delta);
+ P(cpu_load[0]);
+ P(cpu_load[1]);
+ P(cpu_load[2]);
+ P(cpu_load[3]);
+ P(cpu_load[4]);
+#undef P
+
+ print_cfs_stats(m, cpu, now);
+
+ print_rq(m, rq, cpu, now);
+}
+
+static int sched_debug_show(struct seq_file *m, void *v)
+{
+ u64 now = ktime_to_ns(ktime_get());
+ int cpu;
+
+ SEQ_printf(m, "Sched Debug Version: v0.04, cfs-v20, %s %.*s\n",
+ init_utsname()->release,
+ (int)strcspn(init_utsname()->version, " "),
+ init_utsname()->version);
+
+ SEQ_printf(m, "now at %Lu nsecs\n", (unsigned long long)now);
+
+ for_each_online_cpu(cpu)
+ print_cpu(m, cpu, now);
+
+ SEQ_printf(m, "\n");
+
+ return 0;
+}
+
+void sysrq_sched_debug_show(void)
+{
+ sched_debug_show(NULL, NULL);
+}
+
+static int sched_debug_open(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, sched_debug_show, NULL);
+}
+
+static struct file_operations sched_debug_fops = {
+ .open = sched_debug_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+static int __init init_sched_debug_procfs(void)
+{
+ struct proc_dir_entry *pe;
+
+ pe = create_proc_entry("sched_debug", 0644, NULL);
+ if (!pe)
+ return -ENOMEM;
+
+ pe->proc_fops = &sched_debug_fops;
+
+ return 0;
+}
+
+__initcall(init_sched_debug_procfs);
+
+void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
+{
+ unsigned long flags;
+ int num_threads = 1;
+
+ rcu_read_lock();
+ if (lock_task_sighand(p, &flags)) {
+ num_threads = atomic_read(&p->signal->count);
+ unlock_task_sighand(p, &flags);
+ }
+ rcu_read_unlock();
+
+ SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, p->pid, num_threads);
+ SEQ_printf(m, "----------------------------------------------\n");
+#define P(F) \
+ SEQ_printf(m, "%-25s:%20Ld\n", #F, (long long)p->F)
+
+ P(se.wait_start);
+ P(se.wait_start_fair);
+ P(se.exec_start);
+ P(se.sleep_start);
+ P(se.sleep_start_fair);
+ P(se.block_start);
+ P(se.sleep_max);
+ P(se.block_max);
+ P(se.exec_max);
+ P(se.wait_max);
+ P(se.wait_runtime);
+ P(se.wait_runtime_overruns);
+ P(se.wait_runtime_underruns);
+ P(se.sum_wait_runtime);
+ P(se.sum_exec_runtime);
+ SEQ_printf(m, "%-25s:%20Ld\n",
+ "nr_switches", (long long)(p->nvcsw + p->nivcsw));
+ P(se.load.weight);
+ P(policy);
+ P(prio);
+#undef P
+
+ {
+ u64 t0, t1;
+
+ t0 = sched_clock();
+ t1 = sched_clock();
+ SEQ_printf(m, "%-25s:%20Ld\n",
+ "clock-delta", (long long)(t1-t0));
+ }
+}
+
+void proc_sched_set_task(struct task_struct *p)
+{
+ p->se.sleep_max = p->se.block_max = p->se.exec_max = p->se.wait_max = 0;
+ p->se.wait_runtime_overruns = p->se.wait_runtime_underruns = 0;
+ p->se.sum_exec_runtime = 0;
+}
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
new file mode 100644
index 00000000000..6971db0a716
--- /dev/null
+++ b/kernel/sched_fair.c
@@ -0,0 +1,1131 @@
+/*
+ * Completely Fair Scheduling (CFS) Class (SCHED_NORMAL/SCHED_BATCH)
+ *
+ * Copyright (C) 2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ *
+ * Interactivity improvements by Mike Galbraith
+ * (C) 2007 Mike Galbraith <efault@gmx.de>
+ *
+ * Various enhancements by Dmitry Adamushko.
+ * (C) 2007 Dmitry Adamushko <dmitry.adamushko@gmail.com>
+ *
+ * Group scheduling enhancements by Srivatsa Vaddagiri
+ * Copyright IBM Corporation, 2007
+ * Author: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
+ *
+ * Scaled math optimizations by Thomas Gleixner
+ * Copyright (C) 2007, Thomas Gleixner <tglx@linutronix.de>
+ */
+
+/*
+ * Preemption granularity:
+ * (default: 2 msec, units: nanoseconds)
+ *
+ * NOTE: this granularity value is not the same as the concept of
+ * 'timeslice length' - timeslices in CFS will typically be somewhat
+ * larger than this value. (to see the precise effective timeslice
+ * length of your workload, run vmstat and monitor the context-switches
+ * field)
+ *
+ * On SMP systems the value of this is multiplied by the log2 of the
+ * number of CPUs. (i.e. factor 2x on 2-way systems, 3x on 4-way
+ * systems, 4x on 8-way systems, 5x on 16-way systems, etc.)
+ */
+unsigned int sysctl_sched_granularity __read_mostly = 2000000000ULL/HZ;
+
+/*
+ * SCHED_BATCH wake-up granularity.
+ * (default: 10 msec, units: nanoseconds)
+ *
+ * This option delays the preemption effects of decoupled workloads
+ * and reduces their over-scheduling. Synchronous workloads will still
+ * have immediate wakeup/sleep latencies.
+ */
+unsigned int sysctl_sched_batch_wakeup_granularity __read_mostly =
+ 10000000000ULL/HZ;
+
+/*
+ * SCHED_OTHER wake-up granularity.
+ * (default: 1 msec, units: nanoseconds)
+ *
+ * This option delays the preemption effects of decoupled workloads
+ * and reduces their over-scheduling. Synchronous workloads will still
+ * have immediate wakeup/sleep latencies.
+ */
+unsigned int sysctl_sched_wakeup_granularity __read_mostly = 1000000000ULL/HZ;
+
+unsigned int sysctl_sched_stat_granularity __read_mostly;
+
+/*
+ * Initialized in sched_init_granularity():
+ */
+unsigned int sysctl_sched_runtime_limit __read_mostly;
+
+/*
+ * Debugging: various feature bits
+ */
+enum {
+ SCHED_FEAT_FAIR_SLEEPERS = 1,
+ SCHED_FEAT_SLEEPER_AVG = 2,
+ SCHED_FEAT_SLEEPER_LOAD_AVG = 4,
+ SCHED_FEAT_PRECISE_CPU_LOAD = 8,
+ SCHED_FEAT_START_DEBIT = 16,
+ SCHED_FEAT_SKIP_INITIAL = 32,
+};
+
+unsigned int sysctl_sched_features __read_mostly =
+ SCHED_FEAT_FAIR_SLEEPERS *1 |
+ SCHED_FEAT_SLEEPER_AVG *1 |
+ SCHED_FEAT_SLEEPER_LOAD_AVG *1 |
+ SCHED_FEAT_PRECISE_CPU_LOAD *1 |
+ SCHED_FEAT_START_DEBIT *1 |
+ SCHED_FEAT_SKIP_INITIAL *0;
+
+extern struct sched_class fair_sched_class;
+
+/**************************************************************
+ * CFS operations on generic schedulable entities:
+ */
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+
+/* cpu runqueue to which this cfs_rq is attached */
+static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
+{
+ return cfs_rq->rq;
+}
+
+/* currently running entity (if any) on this cfs_rq */
+static inline struct sched_entity *cfs_rq_curr(struct cfs_rq *cfs_rq)
+{
+ return cfs_rq->curr;
+}
+
+/* An entity is a task if it doesn't "own" a runqueue */
+#define entity_is_task(se) (!se->my_q)
+
+static inline void
+set_cfs_rq_curr(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+ cfs_rq->curr = se;
+}
+
+#else /* CONFIG_FAIR_GROUP_SCHED */
+
+static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
+{
+ return container_of(cfs_rq, struct rq, cfs);
+}
+
+static inline struct sched_entity *cfs_rq_curr(struct cfs_rq *cfs_rq)
+{
+ struct rq *rq = rq_of(cfs_rq);
+
+ if (unlikely(rq->curr->sched_class != &fair_sched_class))
+ return NULL;
+
+ return &rq->curr->se;
+}
+
+#define entity_is_task(se) 1
+
+static inline void
+set_cfs_rq_curr(struct cfs_rq *cfs_rq, struct sched_entity *se) { }
+
+#endif /* CONFIG_FAIR_GROUP_SCHED */
+
+static inline struct task_struct *task_of(struct sched_entity *se)
+{
+ return container_of(se, struct task_struct, se);
+}
+
+
+/**************************************************************
+ * Scheduling class tree data structure manipulation methods:
+ */
+
+/*
+ * Enqueue an entity into the rb-tree:
+ */
+static inline void
+__enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+ struct rb_node **link = &cfs_rq->tasks_timeline.rb_node;
+ struct rb_node *parent = NULL;
+ struct sched_entity *entry;
+ s64 key = se->fair_key;
+ int leftmost = 1;
+
+ /*
+ * Find the right place in the rbtree:
+ */
+ while (*link) {
+ parent = *link;
+ entry = rb_entry(parent, struct sched_entity, run_node);
+ /*
+ * We dont care about collisions. Nodes with
+ * the same key stay together.
+ */
+ if (key - entry->fair_key < 0) {
+ link = &parent->rb_left;
+ } else {
+ link = &parent->rb_right;
+ leftmost = 0;
+ }
+ }
+
+ /*
+ * Maintain a cache of leftmost tree entries (it is frequently
+ * used):
+ */
+ if (leftmost)
+ cfs_rq->rb_leftmost = &se->run_node;
+
+ rb_link_node(&se->run_node, parent, link);
+ rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline);
+ update_load_add(&cfs_rq->load, se->load.weight);
+ cfs_rq->nr_running++;
+ se->on_rq = 1;
+}
+
+static inline void
+__dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+ if (cfs_rq->rb_leftmost == &se->run_node)
+ cfs_rq->rb_leftmost = rb_next(&se->run_node);
+ rb_erase(&se->run_node, &cfs_rq->tasks_timeline);
+ update_load_sub(&cfs_rq->load, se->load.weight);
+ cfs_rq->nr_running--;
+ se->on_rq = 0;
+}
+
+static inline struct rb_node *first_fair(struct cfs_rq *cfs_rq)
+{
+ return cfs_rq->rb_leftmost;
+}
+
+static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq)
+{
+ return rb_entry(first_fair(cfs_rq), struct sched_entity, run_node);
+}
+
+/**************************************************************
+ * Scheduling class statistics methods:
+ */
+
+/*
+ * We rescale the rescheduling granularity of tasks according to their
+ * nice level, but only linearly, not exponentially:
+ */
+static long
+niced_granularity(struct sched_entity *curr, unsigned long granularity)
+{
+ u64 tmp;
+
+ /*
+ * Negative nice levels get the same granularity as nice-0:
+ */
+ if (likely(curr->load.weight >= NICE_0_LOAD))
+ return granularity;
+ /*
+ * Positive nice level tasks get linearly finer
+ * granularity:
+ */
+ tmp = curr->load.weight * (u64)granularity;
+
+ /*
+ * It will always fit into 'long':
+ */
+ return (long) (tmp >> NICE_0_SHIFT);
+}
+
+static inline void
+limit_wait_runtime(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+ long limit = sysctl_sched_runtime_limit;
+
+ /*
+ * Niced tasks have the same history dynamic range as
+ * non-niced tasks:
+ */
+ if (unlikely(se->wait_runtime > limit)) {
+ se->wait_runtime = limit;
+ schedstat_inc(se, wait_runtime_overruns);
+ schedstat_inc(cfs_rq, wait_runtime_overruns);
+ }
+ if (unlikely(se->wait_runtime < -limit)) {
+ se->wait_runtime = -limit;
+ schedstat_inc(se, wait_runtime_underruns);
+ schedstat_inc(cfs_rq, wait_runtime_underruns);
+ }
+}
+
+static inline void
+__add_wait_runtime(struct cfs_rq *cfs_rq, struct sched_entity *se, long delta)
+{
+ se->wait_runtime += delta;
+ schedstat_add(se, sum_wait_runtime, delta);
+ limit_wait_runtime(cfs_rq, se);
+}
+
+static void
+add_wait_runtime(struct cfs_rq *cfs_rq, struct sched_entity *se, long delta)
+{
+ schedstat_add(cfs_rq, wait_runtime, -se->wait_runtime);
+ __add_wait_runtime(cfs_rq, se, delta);
+ schedstat_add(cfs_rq, wait_runtime, se->wait_runtime);
+}
+
+/*
+ * Update the current task's runtime statistics. Skip current tasks that
+ * are not in our scheduling class.
+ */
+static inline void
+__update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, u64 now)
+{
+ unsigned long delta, delta_exec, delta_fair;
+ long delta_mine;
+ struct load_weight *lw = &cfs_rq->load;
+ unsigned long load = lw->weight;
+
+ if (unlikely(!load))
+ return;
+
+ delta_exec = curr->delta_exec;
+#ifdef CONFIG_SCHEDSTATS
+ if (unlikely(delta_exec > curr->exec_max))
+ curr->exec_max = delta_exec;
+#endif
+
+ curr->sum_exec_runtime += delta_exec;
+ cfs_rq->exec_clock += delta_exec;
+
+ delta_fair = calc_delta_fair(delta_exec, lw);
+ delta_mine = calc_delta_mine(delta_exec, curr->load.weight, lw);
+
+ if (cfs_rq->sleeper_bonus > sysctl_sched_stat_granularity) {
+ delta = calc_delta_mine(cfs_rq->sleeper_bonus,
+ curr->load.weight, lw);
+ if (unlikely(delta > cfs_rq->sleeper_bonus))
+ delta = cfs_rq->sleeper_bonus;
+
+ cfs_rq->sleeper_bonus -= delta;
+ delta_mine -= delta;
+ }
+
+ cfs_rq->fair_clock += delta_fair;
+ /*
+ * We executed delta_exec amount of time on the CPU,
+ * but we were only entitled to delta_mine amount of
+ * time during that period (if nr_running == 1 then
+ * the two values are equal)
+ * [Note: delta_mine - delta_exec is negative]:
+ */
+ add_wait_runtime(cfs_rq, curr, delta_mine - delta_exec);
+}
+
+static void update_curr(struct cfs_rq *cfs_rq, u64 now)
+{
+ struct sched_entity *curr = cfs_rq_curr(cfs_rq);
+ unsigned long delta_exec;
+
+ if (unlikely(!curr))
+ return;
+
+ /*
+ * Get the amount of time the current task was running
+ * since the last time we changed load (this cannot
+ * overflow on 32 bits):
+ */
+ delta_exec = (unsigned long)(now - curr->exec_start);
+
+ curr->delta_exec += delta_exec;
+
+ if (unlikely(curr->delta_exec > sysctl_sched_stat_granularity)) {
+ __update_curr(cfs_rq, curr, now);
+ curr->delta_exec = 0;
+ }
+ curr->exec_start = now;
+}
+
+static inline void
+update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
+{
+ se->wait_start_fair = cfs_rq->fair_clock;
+ se->wait_start = now;
+}
+
+/*
+ * We calculate fair deltas here, so protect against the random effects
+ * of a multiplication overflow by capping it to the runtime limit:
+ */
+#if BITS_PER_LONG == 32
+static inline unsigned long
+calc_weighted(unsigned long delta, unsigned long weight, int shift)
+{
+ u64 tmp = (u64)delta * weight >> shift;
+
+ if (unlikely(tmp > sysctl_sched_runtime_limit*2))
+ return sysctl_sched_runtime_limit*2;
+ return tmp;
+}
+#else
+static inline unsigned long
+calc_weighted(unsigned long delta, unsigned long weight, int shift)
+{
+ return delta * weight >> shift;
+}
+#endif
+
+/*
+ * Task is being enqueued - update stats:
+ */
+static void
+update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
+{
+ s64 key;
+
+ /*
+ * Are we enqueueing a waiting task? (for current tasks
+ * a dequeue/enqueue event is a NOP)
+ */
+ if (se != cfs_rq_curr(cfs_rq))
+ update_stats_wait_start(cfs_rq, se, now);
+ /*
+ * Update the key:
+ */
+ key = cfs_rq->fair_clock;
+
+ /*
+ * Optimize the common nice 0 case:
+ */
+ if (likely(se->load.weight == NICE_0_LOAD)) {
+ key -= se->wait_runtime;
+ } else {
+ u64 tmp;
+
+ if (se->wait_runtime < 0) {
+ tmp = -se->wait_runtime;
+ key += (tmp * se->load.inv_weight) >>
+ (WMULT_SHIFT - NICE_0_SHIFT);
+ } else {
+ tmp = se->wait_runtime;
+ key -= (tmp * se->load.weight) >> NICE_0_SHIFT;
+ }
+ }
+
+ se->fair_key = key;
+}
+
+/*
+ * Note: must be called with a freshly updated rq->fair_clock.
+ */
+static inline void
+__update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
+{
+ unsigned long delta_fair = se->delta_fair_run;
+
+#ifdef CONFIG_SCHEDSTATS
+ {
+ s64 delta_wait = now - se->wait_start;
+ if (unlikely(delta_wait > se->wait_max))
+ se->wait_max = delta_wait;
+ }
+#endif
+
+ if (unlikely(se->load.weight != NICE_0_LOAD))
+ delta_fair = calc_weighted(delta_fair, se->load.weight,
+ NICE_0_SHIFT);
+
+ add_wait_runtime(cfs_rq, se, delta_fair);
+}
+
+static void
+update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
+{
+ unsigned long delta_fair;
+
+ delta_fair = (unsigned long)min((u64)(2*sysctl_sched_runtime_limit),
+ (u64)(cfs_rq->fair_clock - se->wait_start_fair));
+
+ se->delta_fair_run += delta_fair;
+ if (unlikely(abs(se->delta_fair_run) >=
+ sysctl_sched_stat_granularity)) {
+ __update_stats_wait_end(cfs_rq, se, now);
+ se->delta_fair_run = 0;
+ }
+
+ se->wait_start_fair = 0;
+ se->wait_start = 0;
+}
+
+static inline void
+update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
+{
+ update_curr(cfs_rq, now);
+ /*
+ * Mark the end of the wait period if dequeueing a
+ * waiting task:
+ */
+ if (se != cfs_rq_curr(cfs_rq))
+ update_stats_wait_end(cfs_rq, se, now);
+}
+
+/*
+ * We are picking a new current task - update its stats:
+ */
+static inline void
+update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
+{
+ /*
+ * We are starting a new run period:
+ */
+ se->exec_start = now;
+}
+
+/*
+ * We are descheduling a task - update its stats:
+ */
+static inline void
+update_stats_curr_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
+{
+ se->exec_start = 0;
+}
+
+/**************************************************
+ * Scheduling class queueing methods:
+ */
+
+static void
+__enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
+{
+ unsigned long load = cfs_rq->load.weight, delta_fair;
+ long prev_runtime;
+
+ if (sysctl_sched_features & SCHED_FEAT_SLEEPER_LOAD_AVG)
+ load = rq_of(cfs_rq)->cpu_load[2];
+
+ delta_fair = se->delta_fair_sleep;
+
+ /*
+ * Fix up delta_fair with the effect of us running
+ * during the whole sleep period:
+ */
+ if (sysctl_sched_features & SCHED_FEAT_SLEEPER_AVG)
+ delta_fair = div64_likely32((u64)delta_fair * load,
+ load + se->load.weight);
+
+ if (unlikely(se->load.weight != NICE_0_LOAD))
+ delta_fair = calc_weighted(delta_fair, se->load.weight,
+ NICE_0_SHIFT);
+
+ prev_runtime = se->wait_runtime;
+ __add_wait_runtime(cfs_rq, se, delta_fair);
+ delta_fair = se->wait_runtime - prev_runtime;
+
+ /*
+ * Track the amount of bonus we've given to sleepers:
+ */
+ cfs_rq->sleeper_bonus += delta_fair;
+
+ schedstat_add(cfs_rq, wait_runtime, se->wait_runtime);
+}
+
+static void
+enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
+{
+ struct task_struct *tsk = task_of(se);
+ unsigned long delta_fair;
+
+ if ((entity_is_task(se) && tsk->policy == SCHED_BATCH) ||
+ !(sysctl_sched_features & SCHED_FEAT_FAIR_SLEEPERS))
+ return;
+
+ delta_fair = (unsigned long)min((u64)(2*sysctl_sched_runtime_limit),
+ (u64)(cfs_rq->fair_clock - se->sleep_start_fair));
+
+ se->delta_fair_sleep += delta_fair;
+ if (unlikely(abs(se->delta_fair_sleep) >=
+ sysctl_sched_stat_granularity)) {
+ __enqueue_sleeper(cfs_rq, se, now);
+ se->delta_fair_sleep = 0;
+ }
+
+ se->sleep_start_fair = 0;
+
+#ifdef CONFIG_SCHEDSTATS
+ if (se->sleep_start) {
+ u64 delta = now - se->sleep_start;
+
+ if ((s64)delta < 0)
+ delta = 0;
+
+ if (unlikely(delta > se->sleep_max))
+ se->sleep_max = delta;
+
+ se->sleep_start = 0;
+ se->sum_sleep_runtime += delta;
+ }
+ if (se->block_start) {
+ u64 delta = now - se->block_start;
+
+ if ((s64)delta < 0)
+ delta = 0;
+
+ if (unlikely(delta > se->block_max))
+ se->block_max = delta;
+
+ se->block_start = 0;
+ se->sum_sleep_runtime += delta;
+ }
+#endif
+}
+
+static void
+enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
+ int wakeup, u64 now)
+{
+ /*
+ * Update the fair clock.
+ */
+ update_curr(cfs_rq, now);
+
+ if (wakeup)
+ enqueue_sleeper(cfs_rq, se, now);
+
+ update_stats_enqueue(cfs_rq, se, now);
+ __enqueue_entity(cfs_rq, se);
+}
+
+static void
+dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
+ int sleep, u64 now)
+{
+ update_stats_dequeue(cfs_rq, se, now);
+ if (sleep) {
+ se->sleep_start_fair = cfs_rq->fair_clock;
+#ifdef CONFIG_SCHEDSTATS
+ if (entity_is_task(se)) {
+ struct task_struct *tsk = task_of(se);
+
+ if (tsk->state & TASK_INTERRUPTIBLE)
+ se->sleep_start = now;
+ if (tsk->state & TASK_UNINTERRUPTIBLE)
+ se->block_start = now;
+ }
+ cfs_rq->wait_runtime -= se->wait_runtime;
+#endif
+ }
+ __dequeue_entity(cfs_rq, se);
+}
+
+/*
+ * Preempt the current task with a newly woken task if needed:
+ */
+static void
+__check_preempt_curr_fair(struct cfs_rq *cfs_rq, struct sched_entity *se,
+ struct sched_entity *curr, unsigned long granularity)
+{
+ s64 __delta = curr->fair_key - se->fair_key;
+
+ /*
+ * Take scheduling granularity into account - do not
+ * preempt the current task unless the best task has
+ * a larger than sched_granularity fairness advantage:
+ */
+ if (__delta > niced_granularity(curr, granularity))
+ resched_task(rq_of(cfs_rq)->curr);
+}
+
+static inline void
+set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
+{
+ /*
+ * Any task has to be enqueued before it get to execute on
+ * a CPU. So account for the time it spent waiting on the
+ * runqueue. (note, here we rely on pick_next_task() having
+ * done a put_prev_task_fair() shortly before this, which
+ * updated rq->fair_clock - used by update_stats_wait_end())
+ */
+ update_stats_wait_end(cfs_rq, se, now);
+ update_stats_curr_start(cfs_rq, se, now);
+ set_cfs_rq_curr(cfs_rq, se);
+}
+
+static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq, u64 now)
+{
+ struct sched_entity *se = __pick_next_entity(cfs_rq);
+
+ set_next_entity(cfs_rq, se, now);
+
+ return se;
+}
+
+static void
+put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev, u64 now)
+{
+ /*
+ * If still on the runqueue then deactivate_task()
+ * was not called and update_curr() has to be done:
+ */
+ if (prev->on_rq)
+ update_curr(cfs_rq, now);
+
+ update_stats_curr_end(cfs_rq, prev, now);
+
+ if (prev->on_rq)
+ update_stats_wait_start(cfs_rq, prev, now);
+ set_cfs_rq_curr(cfs_rq, NULL);
+}
+
+static void entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
+{
+ struct rq *rq = rq_of(cfs_rq);
+ struct sched_entity *next;
+ u64 now = __rq_clock(rq);
+
+ /*
+ * Dequeue and enqueue the task to update its
+ * position within the tree:
+ */
+ dequeue_entity(cfs_rq, curr, 0, now);
+ enqueue_entity(cfs_rq, curr, 0, now);
+
+ /*
+ * Reschedule if another task tops the current one.
+ */
+ next = __pick_next_entity(cfs_rq);
+ if (next == curr)
+ return;
+
+ __check_preempt_curr_fair(cfs_rq, next, curr, sysctl_sched_granularity);
+}
+
+/**************************************************
+ * CFS operations on tasks:
+ */
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+
+/* Walk up scheduling entities hierarchy */
+#define for_each_sched_entity(se) \
+ for (; se; se = se->parent)
+
+static inline struct cfs_rq *task_cfs_rq(struct task_struct *p)
+{
+ return p->se.cfs_rq;
+}
+
+/* runqueue on which this entity is (to be) queued */
+static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se)
+{
+ return se->cfs_rq;
+}
+
+/* runqueue "owned" by this group */
+static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
+{
+ return grp->my_q;
+}
+
+/* Given a group's cfs_rq on one cpu, return its corresponding cfs_rq on
+ * another cpu ('this_cpu')
+ */
+static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu)
+{
+ /* A later patch will take group into account */
+ return &cpu_rq(this_cpu)->cfs;
+}
+
+/* Iterate thr' all leaf cfs_rq's on a runqueue */
+#define for_each_leaf_cfs_rq(rq, cfs_rq) \
+ list_for_each_entry(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list)
+
+/* Do the two (enqueued) tasks belong to the same group ? */
+static inline int is_same_group(struct task_struct *curr, struct task_struct *p)
+{
+ if (curr->se.cfs_rq == p->se.cfs_rq)
+ return 1;
+
+ return 0;
+}
+
+#else /* CONFIG_FAIR_GROUP_SCHED */
+
+#define for_each_sched_entity(se) \
+ for (; se; se = NULL)
+
+static inline struct cfs_rq *task_cfs_rq(struct task_struct *p)
+{
+ return &task_rq(p)->cfs;
+}
+
+static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se)
+{
+ struct task_struct *p = task_of(se);
+ struct rq *rq = task_rq(p);
+
+ return &rq->cfs;
+}
+
+/* runqueue "owned" by this group */
+static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
+{
+ return NULL;
+}
+
+static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu)
+{
+ return &cpu_rq(this_cpu)->cfs;
+}
+
+#define for_each_leaf_cfs_rq(rq, cfs_rq) \
+ for (cfs_rq = &rq->cfs; cfs_rq; cfs_rq = NULL)
+
+static inline int is_same_group(struct task_struct *curr, struct task_struct *p)
+{
+ return 1;
+}
+
+#endif /* CONFIG_FAIR_GROUP_SCHED */
+
+/*
+ * The enqueue_task method is called before nr_running is
+ * increased. Here we update the fair scheduling stats and
+ * then put the task into the rbtree:
+ */
+static void
+enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup, u64 now)
+{
+ struct cfs_rq *cfs_rq;
+ struct sched_entity *se = &p->se;
+
+ for_each_sched_entity(se) {
+ if (se->on_rq)
+ break;
+ cfs_rq = cfs_rq_of(se);
+ enqueue_entity(cfs_rq, se, wakeup, now);
+ }
+}
+
+/*
+ * The dequeue_task method is called before nr_running is
+ * decreased. We remove the task from the rbtree and
+ * update the fair scheduling stats:
+ */
+static void
+dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep, u64 now)
+{
+ struct cfs_rq *cfs_rq;
+ struct sched_entity *se = &p->se;
+
+ for_each_sched_entity(se) {
+ cfs_rq = cfs_rq_of(se);
+ dequeue_entity(cfs_rq, se, sleep, now);
+ /* Don't dequeue parent if it has other entities besides us */
+ if (cfs_rq->load.weight)
+ break;
+ }
+}
+
+/*
+ * sched_yield() support is very simple - we dequeue and enqueue
+ */
+static void yield_task_fair(struct rq *rq, struct task_struct *p)
+{
+ struct cfs_rq *cfs_rq = task_cfs_rq(p);
+ u64 now = __rq_clock(rq);
+
+ /*
+ * Dequeue and enqueue the task to update its
+ * position within the tree:
+ */
+ dequeue_entity(cfs_rq, &p->se, 0, now);
+ enqueue_entity(cfs_rq, &p->se, 0, now);
+}
+
+/*
+ * Preempt the current task with a newly woken task if needed:
+ */
+static void check_preempt_curr_fair(struct rq *rq, struct task_struct *p)
+{
+ struct task_struct *curr = rq->curr;
+ struct cfs_rq *cfs_rq = task_cfs_rq(curr);
+ unsigned long gran;
+
+ if (unlikely(rt_prio(p->prio))) {
+ update_curr(cfs_rq, rq_clock(rq));
+ resched_task(curr);
+ return;
+ }
+
+ gran = sysctl_sched_wakeup_granularity;
+ /*
+ * Batch tasks prefer throughput over latency:
+ */
+ if (unlikely(p->policy == SCHED_BATCH))
+ gran = sysctl_sched_batch_wakeup_granularity;
+
+ if (is_same_group(curr, p))
+ __check_preempt_curr_fair(cfs_rq, &p->se, &curr->se, gran);
+}
+
+static struct task_struct *pick_next_task_fair(struct rq *rq, u64 now)
+{
+ struct cfs_rq *cfs_rq = &rq->cfs;
+ struct sched_entity *se;
+
+ if (unlikely(!cfs_rq->nr_running))
+ return NULL;
+
+ do {
+ se = pick_next_entity(cfs_rq, now);
+ cfs_rq = group_cfs_rq(se);
+ } while (cfs_rq);
+
+ return task_of(se);
+}
+
+/*
+ * Account for a descheduled task:
+ */
+static void put_prev_task_fair(struct rq *rq, struct task_struct *prev, u64 now)
+{
+ struct sched_entity *se = &prev->se;
+ struct cfs_rq *cfs_rq;
+
+ for_each_sched_entity(se) {
+ cfs_rq = cfs_rq_of(se);
+ put_prev_entity(cfs_rq, se, now);
+ }
+}
+
+/**************************************************
+ * Fair scheduling class load-balancing methods:
+ */
+
+/*
+ * Load-balancing iterator. Note: while the runqueue stays locked
+ * during the whole iteration, the current task might be
+ * dequeued so the iterator has to be dequeue-safe. Here we
+ * achieve that by always pre-iterating before returning
+ * the current task:
+ */
+static inline struct task_struct *
+__load_balance_iterator(struct cfs_rq *cfs_rq, struct rb_node *curr)
+{
+ struct task_struct *p;
+
+ if (!curr)
+ return NULL;
+
+ p = rb_entry(curr, struct task_struct, se.run_node);
+ cfs_rq->rb_load_balance_curr = rb_next(curr);
+
+ return p;
+}
+
+static struct task_struct *load_balance_start_fair(void *arg)
+{
+ struct cfs_rq *cfs_rq = arg;
+
+ return __load_balance_iterator(cfs_rq, first_fair(cfs_rq));
+}
+
+static struct task_struct *load_balance_next_fair(void *arg)
+{
+ struct cfs_rq *cfs_rq = arg;
+
+ return __load_balance_iterator(cfs_rq, cfs_rq->rb_load_balance_curr);
+}
+
+static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
+{
+ struct sched_entity *curr;
+ struct task_struct *p;
+
+ if (!cfs_rq->nr_running)
+ return MAX_PRIO;
+
+ curr = __pick_next_entity(cfs_rq);
+ p = task_of(curr);
+
+ return p->prio;
+}
+
+static int
+load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
+ unsigned long max_nr_move, unsigned long max_load_move,
+ struct sched_domain *sd, enum cpu_idle_type idle,
+ int *all_pinned, unsigned long *total_load_moved)
+{
+ struct cfs_rq *busy_cfs_rq;
+ unsigned long load_moved, total_nr_moved = 0, nr_moved;
+ long rem_load_move = max_load_move;
+ struct rq_iterator cfs_rq_iterator;
+
+ cfs_rq_iterator.start = load_balance_start_fair;
+ cfs_rq_iterator.next = load_balance_next_fair;
+
+ for_each_leaf_cfs_rq(busiest, busy_cfs_rq) {
+ struct cfs_rq *this_cfs_rq;
+ long imbalance;
+ unsigned long maxload;
+ int this_best_prio, best_prio, best_prio_seen = 0;
+
+ this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu);
+
+ imbalance = busy_cfs_rq->load.weight -
+ this_cfs_rq->load.weight;
+ /* Don't pull if this_cfs_rq has more load than busy_cfs_rq */
+ if (imbalance <= 0)
+ continue;
+
+ /* Don't pull more than imbalance/2 */
+ imbalance /= 2;
+ maxload = min(rem_load_move, imbalance);
+
+ this_best_prio = cfs_rq_best_prio(this_cfs_rq);
+ best_prio = cfs_rq_best_prio(busy_cfs_rq);
+
+ /*
+ * Enable handling of the case where there is more than one task
+ * with the best priority. If the current running task is one
+ * of those with prio==best_prio we know it won't be moved
+ * and therefore it's safe to override the skip (based on load)
+ * of any task we find with that prio.
+ */
+ if (cfs_rq_curr(busy_cfs_rq) == &busiest->curr->se)
+ best_prio_seen = 1;
+
+ /* pass busy_cfs_rq argument into
+ * load_balance_[start|next]_fair iterators
+ */
+ cfs_rq_iterator.arg = busy_cfs_rq;
+ nr_moved = balance_tasks(this_rq, this_cpu, busiest,
+ max_nr_move, maxload, sd, idle, all_pinned,
+ &load_moved, this_best_prio, best_prio,
+ best_prio_seen, &cfs_rq_iterator);
+
+ total_nr_moved += nr_moved;
+ max_nr_move -= nr_moved;
+ rem_load_move -= load_moved;
+
+ if (max_nr_move <= 0 || rem_load_move <= 0)
+ break;
+ }
+
+ *total_load_moved = max_load_move - rem_load_move;
+
+ return total_nr_moved;
+}
+
+/*
+ * scheduler tick hitting a task of our scheduling class:
+ */
+static void task_tick_fair(struct rq *rq, struct task_struct *curr)
+{
+ struct cfs_rq *cfs_rq;
+ struct sched_entity *se = &curr->se;
+
+ for_each_sched_entity(se) {
+ cfs_rq = cfs_rq_of(se);
+ entity_tick(cfs_rq, se);
+ }
+}
+
+/*
+ * Share the fairness runtime between parent and child, thus the
+ * total amount of pressure for CPU stays equal - new tasks
+ * get a chance to run but frequent forkers are not allowed to
+ * monopolize the CPU. Note: the parent runqueue is locked,
+ * the child is not running yet.
+ */
+static void task_new_fair(struct rq *rq, struct task_struct *p)
+{
+ struct cfs_rq *cfs_rq = task_cfs_rq(p);
+ struct sched_entity *se = &p->se;
+ u64 now = rq_clock(rq);
+
+ sched_info_queued(p);
+
+ update_stats_enqueue(cfs_rq, se, now);
+ /*
+ * Child runs first: we let it run before the parent
+ * until it reschedules once. We set up the key so that
+ * it will preempt the parent:
+ */
+ p->se.fair_key = current->se.fair_key -
+ niced_granularity(&rq->curr->se, sysctl_sched_granularity) - 1;
+ /*
+ * The first wait is dominated by the child-runs-first logic,
+ * so do not credit it with that waiting time yet:
+ */
+ if (sysctl_sched_features & SCHED_FEAT_SKIP_INITIAL)
+ p->se.wait_start_fair = 0;
+
+ /*
+ * The statistical average of wait_runtime is about
+ * -granularity/2, so initialize the task with that:
+ */
+ if (sysctl_sched_features & SCHED_FEAT_START_DEBIT)
+ p->se.wait_runtime = -(sysctl_sched_granularity / 2);
+
+ __enqueue_entity(cfs_rq, se);
+ inc_nr_running(p, rq, now);
+}
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+/* Account for a task changing its policy or group.
+ *
+ * This routine is mostly called to set cfs_rq->curr field when a task
+ * migrates between groups/classes.
+ */
+static void set_curr_task_fair(struct rq *rq)
+{
+ struct task_struct *curr = rq->curr;
+ struct sched_entity *se = &curr->se;
+ u64 now = rq_clock(rq);
+ struct cfs_rq *cfs_rq;
+
+ for_each_sched_entity(se) {
+ cfs_rq = cfs_rq_of(se);
+ set_next_entity(cfs_rq, se, now);
+ }
+}
+#else
+static void set_curr_task_fair(struct rq *rq)
+{
+}
+#endif
+
+/*
+ * All the scheduling class methods:
+ */
+struct sched_class fair_sched_class __read_mostly = {
+ .enqueue_task = enqueue_task_fair,
+ .dequeue_task = dequeue_task_fair,
+ .yield_task = yield_task_fair,
+
+ .check_preempt_curr = check_preempt_curr_fair,
+
+ .pick_next_task = pick_next_task_fair,
+ .put_prev_task = put_prev_task_fair,
+
+ .load_balance = load_balance_fair,
+
+ .set_curr_task = set_curr_task_fair,
+ .task_tick = task_tick_fair,
+ .task_new = task_new_fair,
+};
+
+#ifdef CONFIG_SCHED_DEBUG
+void print_cfs_stats(struct seq_file *m, int cpu, u64 now)
+{
+ struct rq *rq = cpu_rq(cpu);
+ struct cfs_rq *cfs_rq;
+
+ for_each_leaf_cfs_rq(rq, cfs_rq)
+ print_cfs_rq(m, cpu, cfs_rq, now);
+}
+#endif
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
new file mode 100644
index 00000000000..41841e741c4
--- /dev/null
+++ b/kernel/sched_idletask.c
@@ -0,0 +1,71 @@
+/*
+ * idle-task scheduling class.
+ *
+ * (NOTE: these are not related to SCHED_IDLE tasks which are
+ * handled in sched_fair.c)
+ */
+
+/*
+ * Idle tasks are unconditionally rescheduled:
+ */
+static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p)
+{
+ resched_task(rq->idle);
+}
+
+static struct task_struct *pick_next_task_idle(struct rq *rq, u64 now)
+{
+ schedstat_inc(rq, sched_goidle);
+
+ return rq->idle;
+}
+
+/*
+ * It is not legal to sleep in the idle task - print a warning
+ * message if some code attempts to do it:
+ */
+static void
+dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep, u64 now)
+{
+ spin_unlock_irq(&rq->lock);
+ printk(KERN_ERR "bad: scheduling from the idle thread!\n");
+ dump_stack();
+ spin_lock_irq(&rq->lock);
+}
+
+static void put_prev_task_idle(struct rq *rq, struct task_struct *prev, u64 now)
+{
+}
+
+static int
+load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest,
+ unsigned long max_nr_move, unsigned long max_load_move,
+ struct sched_domain *sd, enum cpu_idle_type idle,
+ int *all_pinned, unsigned long *total_load_moved)
+{
+ return 0;
+}
+
+static void task_tick_idle(struct rq *rq, struct task_struct *curr)
+{
+}
+
+/*
+ * Simple, special scheduling class for the per-CPU idle tasks:
+ */
+static struct sched_class idle_sched_class __read_mostly = {
+ /* no enqueue/yield_task for idle tasks */
+
+ /* dequeue is not valid, we print a debug message there: */
+ .dequeue_task = dequeue_task_idle,
+
+ .check_preempt_curr = check_preempt_curr_idle,
+
+ .pick_next_task = pick_next_task_idle,
+ .put_prev_task = put_prev_task_idle,
+
+ .load_balance = load_balance_idle,
+
+ .task_tick = task_tick_idle,
+ /* no .task_new for idle tasks */
+};
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
new file mode 100644
index 00000000000..1192a2741b9
--- /dev/null
+++ b/kernel/sched_rt.c
@@ -0,0 +1,255 @@
+/*
+ * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR
+ * policies)
+ */
+
+/*
+ * Update the current task's runtime statistics. Skip current tasks that
+ * are not in our scheduling class.
+ */
+static inline void update_curr_rt(struct rq *rq, u64 now)
+{
+ struct task_struct *curr = rq->curr;
+ u64 delta_exec;
+
+ if (!task_has_rt_policy(curr))
+ return;
+
+ delta_exec = now - curr->se.exec_start;
+ if (unlikely((s64)delta_exec < 0))
+ delta_exec = 0;
+ if (unlikely(delta_exec > curr->se.exec_max))
+ curr->se.exec_max = delta_exec;
+
+ curr->se.sum_exec_runtime += delta_exec;
+ curr->se.exec_start = now;
+}
+
+static void
+enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup, u64 now)
+{
+ struct rt_prio_array *array = &rq->rt.active;
+
+ list_add_tail(&p->run_list, array->queue + p->prio);
+ __set_bit(p->prio, array->bitmap);
+}
+
+/*
+ * Adding/removing a task to/from a priority array:
+ */
+static void
+dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep, u64 now)
+{
+ struct rt_prio_array *array = &rq->rt.active;
+
+ update_curr_rt(rq, now);
+
+ list_del(&p->run_list);
+ if (list_empty(array->queue + p->prio))
+ __clear_bit(p->prio, array->bitmap);
+}
+
+/*
+ * Put task to the end of the run list without the overhead of dequeue
+ * followed by enqueue.
+ */
+static void requeue_task_rt(struct rq *rq, struct task_struct *p)
+{
+ struct rt_prio_array *array = &rq->rt.active;
+
+ list_move_tail(&p->run_list, array->queue + p->prio);
+}
+
+static void
+yield_task_rt(struct rq *rq, struct task_struct *p)
+{
+ requeue_task_rt(rq, p);
+}
+
+/*
+ * Preempt the current task with a newly woken task if needed:
+ */
+static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
+{
+ if (p->prio < rq->curr->prio)
+ resched_task(rq->curr);
+}
+
+static struct task_struct *pick_next_task_rt(struct rq *rq, u64 now)
+{
+ struct rt_prio_array *array = &rq->rt.active;
+ struct task_struct *next;
+ struct list_head *queue;
+ int idx;
+
+ idx = sched_find_first_bit(array->bitmap);
+ if (idx >= MAX_RT_PRIO)
+ return NULL;
+
+ queue = array->queue + idx;
+ next = list_entry(queue->next, struct task_struct, run_list);
+
+ next->se.exec_start = now;
+
+ return next;
+}
+
+static void put_prev_task_rt(struct rq *rq, struct task_struct *p, u64 now)
+{
+ update_curr_rt(rq, now);
+ p->se.exec_start = 0;
+}
+
+/*
+ * Load-balancing iterator. Note: while the runqueue stays locked
+ * during the whole iteration, the current task might be
+ * dequeued so the iterator has to be dequeue-safe. Here we
+ * achieve that by always pre-iterating before returning
+ * the current task:
+ */
+static struct task_struct *load_balance_start_rt(void *arg)
+{
+ struct rq *rq = arg;
+ struct rt_prio_array *array = &rq->rt.active;
+ struct list_head *head, *curr;
+ struct task_struct *p;
+ int idx;
+
+ idx = sched_find_first_bit(array->bitmap);
+ if (idx >= MAX_RT_PRIO)
+ return NULL;
+
+ head = array->queue + idx;
+ curr = head->prev;
+
+ p = list_entry(curr, struct task_struct, run_list);
+
+ curr = curr->prev;
+
+ rq->rt.rt_load_balance_idx = idx;
+ rq->rt.rt_load_balance_head = head;
+ rq->rt.rt_load_balance_curr = curr;
+
+ return p;
+}
+
+static struct task_struct *load_balance_next_rt(void *arg)
+{
+ struct rq *rq = arg;
+ struct rt_prio_array *array = &rq->rt.active;
+ struct list_head *head, *curr;
+ struct task_struct *p;
+ int idx;
+
+ idx = rq->rt.rt_load_balance_idx;
+ head = rq->rt.rt_load_balance_head;
+ curr = rq->rt.rt_load_balance_curr;
+
+ /*
+ * If we arrived back to the head again then
+ * iterate to the next queue (if any):
+ */
+ if (unlikely(head == curr)) {
+ int next_idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
+
+ if (next_idx >= MAX_RT_PRIO)
+ return NULL;
+
+ idx = next_idx;
+ head = array->queue + idx;
+ curr = head->prev;
+
+ rq->rt.rt_load_balance_idx = idx;
+ rq->rt.rt_load_balance_head = head;
+ }
+
+ p = list_entry(curr, struct task_struct, run_list);
+
+ curr = curr->prev;
+
+ rq->rt.rt_load_balance_curr = curr;
+
+ return p;
+}
+
+static int
+load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
+ unsigned long max_nr_move, unsigned long max_load_move,
+ struct sched_domain *sd, enum cpu_idle_type idle,
+ int *all_pinned, unsigned long *load_moved)
+{
+ int this_best_prio, best_prio, best_prio_seen = 0;
+ int nr_moved;
+ struct rq_iterator rt_rq_iterator;
+
+ best_prio = sched_find_first_bit(busiest->rt.active.bitmap);
+ this_best_prio = sched_find_first_bit(this_rq->rt.active.bitmap);
+
+ /*
+ * Enable handling of the case where there is more than one task
+ * with the best priority. If the current running task is one
+ * of those with prio==best_prio we know it won't be moved
+ * and therefore it's safe to override the skip (based on load)
+ * of any task we find with that prio.
+ */
+ if (busiest->curr->prio == best_prio)
+ best_prio_seen = 1;
+
+ rt_rq_iterator.start = load_balance_start_rt;
+ rt_rq_iterator.next = load_balance_next_rt;
+ /* pass 'busiest' rq argument into
+ * load_balance_[start|next]_rt iterators
+ */
+ rt_rq_iterator.arg = busiest;
+
+ nr_moved = balance_tasks(this_rq, this_cpu, busiest, max_nr_move,
+ max_load_move, sd, idle, all_pinned, load_moved,
+ this_best_prio, best_prio, best_prio_seen,
+ &rt_rq_iterator);
+
+ return nr_moved;
+}
+
+static void task_tick_rt(struct rq *rq, struct task_struct *p)
+{
+ /*
+ * RR tasks need a special form of timeslice management.
+ * FIFO tasks have no timeslices.
+ */
+ if (p->policy != SCHED_RR)
+ return;
+
+ if (--p->time_slice)
+ return;
+
+ p->time_slice = static_prio_timeslice(p->static_prio);
+ set_tsk_need_resched(p);
+
+ /* put it at the end of the queue: */
+ requeue_task_rt(rq, p);
+}
+
+/*
+ * No parent/child timeslice management necessary for RT tasks,
+ * just activate them:
+ */
+static void task_new_rt(struct rq *rq, struct task_struct *p)
+{
+ activate_task(rq, p, 1);
+}
+
+static struct sched_class rt_sched_class __read_mostly = {
+ .enqueue_task = enqueue_task_rt,
+ .dequeue_task = dequeue_task_rt,
+ .yield_task = yield_task_rt,
+
+ .check_preempt_curr = check_preempt_curr_rt,
+
+ .pick_next_task = pick_next_task_rt,
+ .put_prev_task = put_prev_task_rt,
+
+ .load_balance = load_balance_rt,
+
+ .task_tick = task_tick_rt,
+ .task_new = task_new_rt,
+};
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
new file mode 100644
index 00000000000..c63c38f6fa6
--- /dev/null
+++ b/kernel/sched_stats.h
@@ -0,0 +1,235 @@
+
+#ifdef CONFIG_SCHEDSTATS
+/*
+ * bump this up when changing the output format or the meaning of an existing
+ * format, so that tools can adapt (or abort)
+ */
+#define SCHEDSTAT_VERSION 14
+
+static int show_schedstat(struct seq_file *seq, void *v)
+{
+ int cpu;
+
+ seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION);
+ seq_printf(seq, "timestamp %lu\n", jiffies);
+ for_each_online_cpu(cpu) {
+ struct rq *rq = cpu_rq(cpu);
+#ifdef CONFIG_SMP
+ struct sched_domain *sd;
+ int dcnt = 0;
+#endif
+
+ /* runqueue-specific stats */
+ seq_printf(seq,
+ "cpu%d %lu %lu %lu %lu %lu %lu %lu %lu %lu %llu %llu %lu",
+ cpu, rq->yld_both_empty,
+ rq->yld_act_empty, rq->yld_exp_empty, rq->yld_cnt,
+ rq->sched_switch, rq->sched_cnt, rq->sched_goidle,
+ rq->ttwu_cnt, rq->ttwu_local,
+ rq->rq_sched_info.cpu_time,
+ rq->rq_sched_info.run_delay, rq->rq_sched_info.pcnt);
+
+ seq_printf(seq, "\n");
+
+#ifdef CONFIG_SMP
+ /* domain-specific stats */
+ preempt_disable();
+ for_each_domain(cpu, sd) {
+ enum cpu_idle_type itype;
+ char mask_str[NR_CPUS];
+
+ cpumask_scnprintf(mask_str, NR_CPUS, sd->span);
+ seq_printf(seq, "domain%d %s", dcnt++, mask_str);
+ for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES;
+ itype++) {
+ seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu "
+ "%lu",
+ sd->lb_cnt[itype],
+ sd->lb_balanced[itype],
+ sd->lb_failed[itype],
+ sd->lb_imbalance[itype],
+ sd->lb_gained[itype],
+ sd->lb_hot_gained[itype],
+ sd->lb_nobusyq[itype],
+ sd->lb_nobusyg[itype]);
+ }
+ seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu"
+ " %lu %lu %lu\n",
+ sd->alb_cnt, sd->alb_failed, sd->alb_pushed,
+ sd->sbe_cnt, sd->sbe_balanced, sd->sbe_pushed,
+ sd->sbf_cnt, sd->sbf_balanced, sd->sbf_pushed,
+ sd->ttwu_wake_remote, sd->ttwu_move_affine,
+ sd->ttwu_move_balance);
+ }
+ preempt_enable();
+#endif
+ }
+ return 0;
+}
+
+static int schedstat_open(struct inode *inode, struct file *file)
+{
+ unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32);
+ char *buf = kmalloc(size, GFP_KERNEL);
+ struct seq_file *m;
+ int res;
+
+ if (!buf)
+ return -ENOMEM;
+ res = single_open(file, show_schedstat, NULL);
+ if (!res) {
+ m = file->private_data;
+ m->buf = buf;
+ m->size = size;
+ } else
+ kfree(buf);
+ return res;
+}
+
+const struct file_operations proc_schedstat_operations = {
+ .open = schedstat_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+/*
+ * Expects runqueue lock to be held for atomicity of update
+ */
+static inline void
+rq_sched_info_arrive(struct rq *rq, unsigned long long delta)
+{
+ if (rq) {
+ rq->rq_sched_info.run_delay += delta;
+ rq->rq_sched_info.pcnt++;
+ }
+}
+
+/*
+ * Expects runqueue lock to be held for atomicity of update
+ */
+static inline void
+rq_sched_info_depart(struct rq *rq, unsigned long long delta)
+{
+ if (rq)
+ rq->rq_sched_info.cpu_time += delta;
+}
+# define schedstat_inc(rq, field) do { (rq)->field++; } while (0)
+# define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0)
+#else /* !CONFIG_SCHEDSTATS */
+static inline void
+rq_sched_info_arrive(struct rq *rq, unsigned long long delta)
+{}
+static inline void
+rq_sched_info_depart(struct rq *rq, unsigned long long delta)
+{}
+# define schedstat_inc(rq, field) do { } while (0)
+# define schedstat_add(rq, field, amt) do { } while (0)
+#endif
+
+#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
+/*
+ * Called when a process is dequeued from the active array and given
+ * the cpu. We should note that with the exception of interactive
+ * tasks, the expired queue will become the active queue after the active
+ * queue is empty, without explicitly dequeuing and requeuing tasks in the
+ * expired queue. (Interactive tasks may be requeued directly to the
+ * active queue, thus delaying tasks in the expired queue from running;
+ * see scheduler_tick()).
+ *
+ * This function is only called from sched_info_arrive(), rather than
+ * dequeue_task(). Even though a task may be queued and dequeued multiple
+ * times as it is shuffled about, we're really interested in knowing how
+ * long it was from the *first* time it was queued to the time that it
+ * finally hit a cpu.
+ */
+static inline void sched_info_dequeued(struct task_struct *t)
+{
+ t->sched_info.last_queued = 0;
+}
+
+/*
+ * Called when a task finally hits the cpu. We can now calculate how
+ * long it was waiting to run. We also note when it began so that we
+ * can keep stats on how long its timeslice is.
+ */
+static void sched_info_arrive(struct task_struct *t)
+{
+ unsigned long long now = sched_clock(), delta = 0;
+
+ if (t->sched_info.last_queued)
+ delta = now - t->sched_info.last_queued;
+ sched_info_dequeued(t);
+ t->sched_info.run_delay += delta;
+ t->sched_info.last_arrival = now;
+ t->sched_info.pcnt++;
+
+ rq_sched_info_arrive(task_rq(t), delta);
+}
+
+/*
+ * Called when a process is queued into either the active or expired
+ * array. The time is noted and later used to determine how long we
+ * had to wait for us to reach the cpu. Since the expired queue will
+ * become the active queue after active queue is empty, without dequeuing
+ * and requeuing any tasks, we are interested in queuing to either. It
+ * is unusual but not impossible for tasks to be dequeued and immediately
+ * requeued in the same or another array: this can happen in sched_yield(),
+ * set_user_nice(), and even load_balance() as it moves tasks from runqueue
+ * to runqueue.
+ *
+ * This function is only called from enqueue_task(), but also only updates
+ * the timestamp if it is already not set. It's assumed that
+ * sched_info_dequeued() will clear that stamp when appropriate.
+ */
+static inline void sched_info_queued(struct task_struct *t)
+{
+ if (unlikely(sched_info_on()))
+ if (!t->sched_info.last_queued)
+ t->sched_info.last_queued = sched_clock();
+}
+
+/*
+ * Called when a process ceases being the active-running process, either
+ * voluntarily or involuntarily. Now we can calculate how long we ran.
+ */
+static inline void sched_info_depart(struct task_struct *t)
+{
+ unsigned long long delta = sched_clock() - t->sched_info.last_arrival;
+
+ t->sched_info.cpu_time += delta;
+ rq_sched_info_depart(task_rq(t), delta);
+}
+
+/*
+ * Called when tasks are switched involuntarily due, typically, to expiring
+ * their time slice. (This may also be called when switching to or from
+ * the idle task.) We are only called when prev != next.
+ */
+static inline void
+__sched_info_switch(struct task_struct *prev, struct task_struct *next)
+{
+ struct rq *rq = task_rq(prev);
+
+ /*
+ * prev now departs the cpu. It's not interesting to record
+ * stats about how efficient we were at scheduling the idle
+ * process, however.
+ */
+ if (prev != rq->idle)
+ sched_info_depart(prev);
+
+ if (next != rq->idle)
+ sched_info_arrive(next);
+}
+static inline void
+sched_info_switch(struct task_struct *prev, struct task_struct *next)
+{
+ if (unlikely(sched_info_on()))
+ __sched_info_switch(prev, next);
+}
+#else
+#define sched_info_queued(t) do { } while (0)
+#define sched_info_switch(t, next) do { } while (0)
+#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */
+
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 0b9886a00e7..73217a9e287 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -488,7 +488,6 @@ void __init softirq_init(void)
static int ksoftirqd(void * __bind_cpu)
{
- set_user_nice(current, 19);
current->flags |= PF_NOFREEZE;
set_current_state(TASK_INTERRUPTIBLE);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 30ee462ee79..51f5dac42a0 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -206,7 +206,87 @@ static ctl_table root_table[] = {
{ .ctl_name = 0 }
};
+#ifdef CONFIG_SCHED_DEBUG
+static unsigned long min_sched_granularity_ns = 100000; /* 100 usecs */
+static unsigned long max_sched_granularity_ns = 1000000000; /* 1 second */
+static unsigned long min_wakeup_granularity_ns; /* 0 usecs */
+static unsigned long max_wakeup_granularity_ns = 1000000000; /* 1 second */
+#endif
+
static ctl_table kern_table[] = {
+#ifdef CONFIG_SCHED_DEBUG
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "sched_granularity_ns",
+ .data = &sysctl_sched_granularity,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &min_sched_granularity_ns,
+ .extra2 = &max_sched_granularity_ns,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "sched_wakeup_granularity_ns",
+ .data = &sysctl_sched_wakeup_granularity,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &min_wakeup_granularity_ns,
+ .extra2 = &max_wakeup_granularity_ns,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "sched_batch_wakeup_granularity_ns",
+ .data = &sysctl_sched_batch_wakeup_granularity,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &min_wakeup_granularity_ns,
+ .extra2 = &max_wakeup_granularity_ns,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "sched_stat_granularity_ns",
+ .data = &sysctl_sched_stat_granularity,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &min_wakeup_granularity_ns,
+ .extra2 = &max_wakeup_granularity_ns,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "sched_runtime_limit_ns",
+ .data = &sysctl_sched_runtime_limit,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &min_sched_granularity_ns,
+ .extra2 = &max_sched_granularity_ns,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "sched_child_runs_first",
+ .data = &sysctl_sched_child_runs_first,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "sched_features",
+ .data = &sysctl_sched_features,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+#endif
{
.ctl_name = KERN_PANIC,
.procname = "panic",
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index da95e10cfd7..fab32a28637 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -105,6 +105,15 @@ config DETECT_SOFTLOCKUP
can be detected via the NMI-watchdog, on platforms that
support it.)
+config SCHED_DEBUG
+ bool "Collect scheduler debugging info"
+ depends on DEBUG_KERNEL && PROC_FS
+ default y
+ help
+ If you say Y here, the /proc/sched_debug file will be provided
+ that can help debug the scheduler. The runtime overhead of this
+ option is minimal.
+
config SCHEDSTATS
bool "Collect scheduler statistics"
depends on DEBUG_KERNEL && PROC_FS