From c225aa57ff4ffe715df4692676b77c815a337236 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20Holm=20Th=C3=B8gersen?= Date: Sun, 11 Jan 2009 22:34:01 -0500 Subject: ext4: fix wrong use of do_div MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit the following warning: fs/jbd2/journal.c: In function ‘jbd2_seq_info_show’: fs/jbd2/journal.c:850: warning: format ‘%lu’ expects type ‘long unsigned int’, but argument 3 has type ‘uint32_t’ is caused by wrong usage of do_div that modifies the dividend in-place and returns the quotient. So not only would an incorrect value be displayed, but s->journal->j_average_commit_time would also be changed to a wrong value! Fix it by using div_u64 instead. Signed-off-by: Simon Holm Thøgersen Signed-off-by: "Theodore Ts'o" --- fs/jbd2/journal.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 56675306ed8..eb343008ede 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -37,10 +37,10 @@ #include #include #include +#include #include #include -#include EXPORT_SYMBOL(jbd2_journal_start); EXPORT_SYMBOL(jbd2_journal_restart); @@ -846,8 +846,8 @@ static int jbd2_seq_info_show(struct seq_file *seq, void *v) jiffies_to_msecs(s->stats->u.run.rs_flushing / s->stats->ts_tid)); seq_printf(seq, " %ums logging transaction\n", jiffies_to_msecs(s->stats->u.run.rs_logging / s->stats->ts_tid)); - seq_printf(seq, " %luus average transaction commit time\n", - do_div(s->journal->j_average_commit_time, 1000)); + seq_printf(seq, " %lluus average transaction commit time\n", + div_u64(s->journal->j_average_commit_time, 1000)); seq_printf(seq, " %lu handles per transaction\n", s->stats->u.run.rs_handle_count / s->stats->ts_tid); seq_printf(seq, " %lu blocks per transaction\n", -- cgit v1.2.3 From 06a279d636734da32bb62dd2f7b0ade666f65d7c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 17 Jan 2009 18:41:37 -0500 Subject: ext4: only use i_size_high for regular files Directories are not allowed to be bigger than 2GB, so don't use i_size_high for anything other than regular files. E2fsck should complain about these inodes, but the simplest thing to do for the kernel is to only use i_size_high for regular files. This prevents an intentially corrupted filesystem from causing the kernel to burn a huge amount of CPU and issuing error messages such as: EXT4-fs warning (device loop0): ext4_block_to_path: block 135090028 > max Thanks to David Maciejak from Fortinet's FortiGuard Global Security Research Team for reporting this issue. http://bugzilla.kernel.org/show_bug.cgi?id=12375 Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/ext4.h | 7 +++++-- fs/ext4/inode.c | 4 ++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index c668e4377d7..aafc9eba1c2 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1206,8 +1206,11 @@ static inline void ext4_r_blocks_count_set(struct ext4_super_block *es, static inline loff_t ext4_isize(struct ext4_inode *raw_inode) { - return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) | - le32_to_cpu(raw_inode->i_size_lo); + if (S_ISREG(le16_to_cpu(raw_inode->i_mode))) + return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) | + le32_to_cpu(raw_inode->i_size_lo); + else + return (loff_t) le32_to_cpu(raw_inode->i_size_lo); } static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a6444cee0c7..49484ba801c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -360,9 +360,9 @@ static int ext4_block_to_path(struct inode *inode, final = ptrs; } else { ext4_warning(inode->i_sb, "ext4_block_to_path", - "block %lu > max", + "block %lu > max in inode %lu", i_block + direct_blocks + - indirect_blocks + double_blocks); + indirect_blocks + double_blocks, inode->i_ino); } if (boundary) *boundary = final - 1 - (i_block & (ptrs - 1)); -- cgit v1.2.3 From e6b8bc09ba2075cd91fbffefcd2778b1a00bd76f Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 16 Jan 2009 11:13:40 -0500 Subject: ext4: Add sanity check to make_indexed_dir Make sure the rec_len field in the '..' entry is sane, lest we overrun the directory block and cause a kernel oops on a purposefully corrupted filesystem. Thanks to Sami Liedes for reporting this bug. http://bugzilla.kernel.org/show_bug.cgi?id=12430 Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/namei.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index fec0b4c2f5f..ba702bd7910 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1368,7 +1368,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, struct fake_dirent *fde; blocksize = dir->i_sb->s_blocksize; - dxtrace(printk(KERN_DEBUG "Creating index\n")); + dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino)); retval = ext4_journal_get_write_access(handle, bh); if (retval) { ext4_std_error(dir->i_sb, retval); @@ -1377,6 +1377,20 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, } root = (struct dx_root *) bh->b_data; + /* The 0th block becomes the root, move the dirents out */ + fde = &root->dotdot; + de = (struct ext4_dir_entry_2 *)((char *)fde + + ext4_rec_len_from_disk(fde->rec_len)); + if ((char *) de >= (((char *) root) + blocksize)) { + ext4_error(dir->i_sb, __func__, + "invalid rec_len for '..' in inode %lu", + dir->i_ino); + brelse(bh); + return -EIO; + } + len = ((char *) root) + blocksize - (char *) de; + + /* Allocate new block for the 0th block's dirents */ bh2 = ext4_append(handle, dir, &block, &retval); if (!(bh2)) { brelse(bh); @@ -1385,11 +1399,6 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, EXT4_I(dir)->i_flags |= EXT4_INDEX_FL; data1 = bh2->b_data; - /* The 0th block becomes the root, move the dirents out */ - fde = &root->dotdot; - de = (struct ext4_dir_entry_2 *)((char *)fde + - ext4_rec_len_from_disk(fde->rec_len)); - len = ((char *) root) + blocksize - (char *) de; memcpy (data1, de, len); de = (struct ext4_dir_entry_2 *) data1; top = data1 + len; -- cgit v1.2.3 From a21102b55c4f8dfd3adb4a15a34cd62237b46039 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 16 Jan 2009 11:13:47 -0500 Subject: ext3: Add sanity check to make_indexed_dir Make sure the rec_len field in the '..' entry is sane, lest we overrun the directory block and cause a kernel oops on a purposefully corrupted filesystem. This fixes a bug related to a bug originally reported by Sami Liedes for ext4 at: http://bugzilla.kernel.org/show_bug.cgi?id=12430 Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext3/namei.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 69a3d19ca9f..4db4ffa1eda 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -1358,7 +1358,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, struct fake_dirent *fde; blocksize = dir->i_sb->s_blocksize; - dxtrace(printk("Creating index\n")); + dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino)); retval = ext3_journal_get_write_access(handle, bh); if (retval) { ext3_std_error(dir->i_sb, retval); @@ -1367,6 +1367,19 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, } root = (struct dx_root *) bh->b_data; + /* The 0th block becomes the root, move the dirents out */ + fde = &root->dotdot; + de = (struct ext3_dir_entry_2 *)((char *)fde + + ext3_rec_len_from_disk(fde->rec_len)); + if ((char *) de >= (((char *) root) + blocksize)) { + ext3_error(dir->i_sb, __func__, + "invalid rec_len for '..' in inode %lu", + dir->i_ino); + brelse(bh); + return -EIO; + } + len = ((char *) root) + blocksize - (char *) de; + bh2 = ext3_append (handle, dir, &block, &retval); if (!(bh2)) { brelse(bh); @@ -1375,11 +1388,6 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, EXT3_I(dir)->i_flags |= EXT3_INDEX_FL; data1 = bh2->b_data; - /* The 0th block becomes the root, move the dirents out */ - fde = &root->dotdot; - de = (struct ext3_dir_entry_2 *)((char *)fde + - ext3_rec_len_from_disk(fde->rec_len)); - len = ((char *) root) + blocksize - (char *) de; memcpy (data1, de, len); de = (struct ext3_dir_entry_2 *) data1; top = data1 + len; -- cgit v1.2.3 From 08ec8c3878cea0bf91f2ba3c0badf44b383752d0 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 16 Jan 2009 11:57:00 -0500 Subject: jbd2: On a __journal_expect() assertion failure printk "JBD2", not "EXT3-fs" Otherwise it can be very confusing to find a "EXT3-fs: " failure in the middle of EXT4-fs failures, and it makes it harder to track the source of the failure. Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index b45109c61fb..b28b37eb11c 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -308,7 +308,8 @@ void buffer_assertion_failure(struct buffer_head *bh); int val = (expr); \ if (!val) { \ printk(KERN_ERR \ - "EXT3-fs unexpected failure: %s;\n",# expr); \ + "JBD2 unexpected failure: %s: %s;\n", \ + __func__, #expr); \ printk(KERN_ERR why "\n"); \ } \ val; \ -- cgit v1.2.3 From e7f07968c16bdd9480001c0a9de013ba56889cf9 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 20 Jan 2009 09:50:19 -0500 Subject: ext4: Fix ext4_free_blocks() w/o a journal when files have indirect blocks When trying to unlink a file with indirect blocks on a filesystem without a journal, the "circular indirect block" sanity test was getting falsely triggered. Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 49484ba801c..b4386dafeb0 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3622,7 +3622,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, * block pointed to itself, it would have been detached when * the block was cleared. Check for this instead of OOPSing. */ - if (bh2jh(this_bh)) + if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) ext4_handle_dirty_metadata(handle, inode, this_bh); else ext4_error(inode->i_sb, __func__, -- cgit v1.2.3 From fdff73f094e7220602cc3f8959c7230517976412 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 26 Jan 2009 19:06:41 -0500 Subject: ext4: Initialize the new group descriptor when resizing the filesystem Make sure all of the fields of the group descriptor are properly initialized. Previously, we allowed bg_flags field to be contain random garbage, which could trigger non-deterministic behavior, including a kernel OOPS. http://bugzilla.kernel.org/show_bug.cgi?id=12433 Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/resize.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index c328be5d688..c06886abd65 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -861,12 +861,13 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) gdp = (struct ext4_group_desc *)((char *)primary->b_data + gdb_off * EXT4_DESC_SIZE(sb)); + memset(gdp, 0, EXT4_DESC_SIZE(sb)); ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */ ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */ ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */ ext4_free_blks_set(sb, gdp, input->free_blocks_count); ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); - gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED); + gdp->bg_flags = cpu_to_le16(EXT4_BG_INODE_ZEROED); gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp); /* -- cgit v1.2.3 From 9fd9784c91db79e953ea3fe3741f885bdc390a72 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Mon, 26 Jan 2009 19:26:26 -0500 Subject: ext4: Fix building with EXT4FS_DEBUG When bg_free_blocks_count was renamed to bg_free_blocks_count_lo in 560671a0, its uses under EXT4FS_DEBUG were not changed to the helper ext4_free_blks_count. Another commit, 498e5f24, also did not change everything needed under EXT4FS_DEBUG, thus making it spill some warnings related to printing format. This commit fixes both issues and makes ext4 build again when EXT4FS_DEBUG is enabled. Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: "Theodore Ts'o" --- fs/ext4/balloc.c | 6 +++--- fs/ext4/extents.c | 2 +- fs/ext4/mballoc.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 6bba06b09dd..9a50b8052dc 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -684,15 +684,15 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) gdp = ext4_get_group_desc(sb, i, NULL); if (!gdp) continue; - desc_count += le16_to_cpu(gdp->bg_free_blocks_count); + desc_count += ext4_free_blks_count(sb, gdp); brelse(bitmap_bh); bitmap_bh = ext4_read_block_bitmap(sb, i); if (bitmap_bh == NULL) continue; x = ext4_count_free(bitmap_bh, sb->s_blocksize); - printk(KERN_DEBUG "group %lu: stored = %d, counted = %u\n", - i, le16_to_cpu(gdp->bg_free_blocks_count), x); + printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n", + i, ext4_free_blks_count(sb, gdp), x); bitmap_count += x; } brelse(bitmap_bh); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 54bf0623a9a..e2eab196875 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3048,7 +3048,7 @@ retry: WARN_ON(ret <= 0); printk(KERN_ERR "%s: ext4_ext_get_blocks " "returned error inode#%lu, block=%u, " - "max_blocks=%lu", __func__, + "max_blocks=%u", __func__, inode->i_ino, block, max_blocks); #endif ext4_mark_inode_dirty(handle, inode); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 918aec0c8a1..deba54f6cbe 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3025,7 +3025,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, goto out_err; ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group, - gdp->bg_free_blocks_count); + ext4_free_blks_count(sb, gdp)); err = ext4_journal_get_write_access(handle, gdp_bh); if (err) -- cgit v1.2.3 From 8527bec548e01a29c6d1928d20d6d3be71861482 Mon Sep 17 00:00:00 2001 From: "Ira W. Snyder" Date: Mon, 26 Jan 2009 21:00:33 -0800 Subject: virtio_net: use correct accessors for scatterlists Without this fix, virtio_net makes incorrect usage of scatterlists. It sets the end of the scatterlist chain after the first element, despite the fact that more entries come after it. If you try to run dma_map_sg() on one of the scatterlists given to you by add_buf(), you will get a null pointer oops. Signed-off-by: Ira W. Snyder Signed-off-by: Rusty Russell Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 63ef2a8905f..c68808336c8 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -287,7 +287,7 @@ static void try_fill_recv_maxbufs(struct virtnet_info *vi) skb_put(skb, MAX_PACKET_LEN); hdr = skb_vnet_hdr(skb); - sg_init_one(sg, hdr, sizeof(*hdr)); + sg_set_buf(sg, hdr, sizeof(*hdr)); if (vi->big_packets) { for (i = 0; i < MAX_SKB_FRAGS; i++) { @@ -488,9 +488,9 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) /* Encode metadata header at front. */ if (vi->mergeable_rx_bufs) - sg_init_one(sg, mhdr, sizeof(*mhdr)); + sg_set_buf(sg, mhdr, sizeof(*mhdr)); else - sg_init_one(sg, hdr, sizeof(*hdr)); + sg_set_buf(sg, hdr, sizeof(*hdr)); num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; -- cgit v1.2.3 From 98322f22eca889478045cf896b572250d03dc45f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 26 Jan 2009 21:35:35 -0800 Subject: udp: optimize bind(0) if many ports are in use commit 9088c5609584684149f3fb5b065aa7f18dcb03ff (udp: Improve port randomization) introduced a regression for UDP bind() syscall to null port (getting a random port) in case lot of ports are already in use. This is because we do about 28000 scans of very long chains (220 sockets per chain), with many spin_lock_bh()/spin_unlock_bh() calls. Fix this using a bitmap (64 bytes for current value of UDP_HTABLE_SIZE) so that we scan chains at most once. Instead of 250 ms per bind() call, we get after patch a time of 2.9 ms Based on a report from Vitaly Mayatskikh Reported-by: Vitaly Mayatskikh Signed-off-by: Eric Dumazet Tested-by: Vitaly Mayatskikh Signed-off-by: David S. Miller --- net/ipv4/udp.c | 55 +++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 16 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index cf5ab0581eb..b7faffe5c02 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -120,8 +120,11 @@ EXPORT_SYMBOL(sysctl_udp_wmem_min); atomic_t udp_memory_allocated; EXPORT_SYMBOL(udp_memory_allocated); +#define PORTS_PER_CHAIN (65536 / UDP_HTABLE_SIZE) + static int udp_lib_lport_inuse(struct net *net, __u16 num, const struct udp_hslot *hslot, + unsigned long *bitmap, struct sock *sk, int (*saddr_comp)(const struct sock *sk1, const struct sock *sk2)) @@ -132,12 +135,17 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, sk_nulls_for_each(sk2, node, &hslot->head) if (net_eq(sock_net(sk2), net) && sk2 != sk && - sk2->sk_hash == num && + (bitmap || sk2->sk_hash == num) && (!sk2->sk_reuse || !sk->sk_reuse) && (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && - (*saddr_comp)(sk, sk2)) - return 1; + (*saddr_comp)(sk, sk2)) { + if (bitmap) + __set_bit(sk2->sk_hash / UDP_HTABLE_SIZE, + bitmap); + else + return 1; + } return 0; } @@ -160,32 +168,47 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, if (!snum) { int low, high, remaining; unsigned rand; - unsigned short first; + unsigned short first, last; + DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN); inet_get_local_port_range(&low, &high); remaining = (high - low) + 1; rand = net_random(); - snum = first = rand % remaining + low; - rand |= 1; - for (;;) { - hslot = &udptable->hash[udp_hashfn(net, snum)]; + first = (((u64)rand * remaining) >> 32) + low; + /* + * force rand to be an odd multiple of UDP_HTABLE_SIZE + */ + rand = (rand | 1) * UDP_HTABLE_SIZE; + for (last = first + UDP_HTABLE_SIZE; first != last; first++) { + hslot = &udptable->hash[udp_hashfn(net, first)]; + bitmap_zero(bitmap, PORTS_PER_CHAIN); spin_lock_bh(&hslot->lock); - if (!udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp)) - break; - spin_unlock_bh(&hslot->lock); + udp_lib_lport_inuse(net, snum, hslot, bitmap, sk, + saddr_comp); + + snum = first; + /* + * Iterate on all possible values of snum for this hash. + * Using steps of an odd multiple of UDP_HTABLE_SIZE + * give us randomization and full range coverage. + */ do { - snum = snum + rand; - } while (snum < low || snum > high); - if (snum == first) - goto fail; + if (low <= snum && snum <= high && + !test_bit(snum / UDP_HTABLE_SIZE, bitmap)) + goto found; + snum += rand; + } while (snum != first); + spin_unlock_bh(&hslot->lock); } + goto fail; } else { hslot = &udptable->hash[udp_hashfn(net, snum)]; spin_lock_bh(&hslot->lock); - if (udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp)) + if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, saddr_comp)) goto fail_unlock; } +found: inet_sk(sk)->num = snum; sk->sk_hash = snum; if (sk_unhashed(sk)) { -- cgit v1.2.3 From a7a41acf99d9150b424839b0d7b4f5ad9d211e2d Mon Sep 17 00:00:00 2001 From: Manish Katiyar Date: Mon, 26 Jan 2009 21:54:21 -0800 Subject: r6040: Remove unused variable pdev from drivers/net/r6040.c drivers/net/r6040.c:441: warning: unused variable 'pdev' Signed-off-by: Manish Katiyar Signed-off-by: David S. Miller --- drivers/net/r6040.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/r6040.c b/drivers/net/r6040.c index 72fd9e97c19..b2dcdb5ed8b 100644 --- a/drivers/net/r6040.c +++ b/drivers/net/r6040.c @@ -438,7 +438,6 @@ static void r6040_down(struct net_device *dev) { struct r6040_private *lp = netdev_priv(dev); void __iomem *ioaddr = lp->base; - struct pci_dev *pdev = lp->pdev; int limit = 2048; u16 *adrp; u16 cmd; -- cgit v1.2.3 From 9fa5fdf291c9b58b1cb8b4bb2a0ee57efa21d635 Mon Sep 17 00:00:00 2001 From: Dimitris Michailidis Date: Mon, 26 Jan 2009 22:15:31 -0800 Subject: tcp: Fix length tcp_splice_data_recv passes to skb_splice_bits. tcp_splice_data_recv has two lengths to consider: the len parameter it gets from tcp_read_sock, which specifies the amount of data in the skb, and rd_desc->count, which is the amount of data the splice caller still wants. Currently it passes just the latter to skb_splice_bits, which then splices min(rd_desc->count, skb->len - offset) bytes. Most of the time this is fine, except when the skb contains urgent data. In that case len goes only up to the urgent byte and is less than skb->len - offset. By ignoring len tcp_splice_data_recv may a) splice data tcp_read_sock told it not to, b) return to tcp_read_sock a value > len. Now, tcp_read_sock doesn't handle used > len and leaves the socket in a bad state (both sk_receive_queue and copied_seq are bad at that point) resulting in duplicated data and corruption. Fix by passing min(rd_desc->count, len) to skb_splice_bits. Signed-off-by: Dimitris Michailidis Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0cd71b84e48..76b148bcb0d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -524,7 +524,8 @@ static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, struct tcp_splice_state *tss = rd_desc->arg.data; int ret; - ret = skb_splice_bits(skb, offset, tss->pipe, rd_desc->count, tss->flags); + ret = skb_splice_bits(skb, offset, tss->pipe, min(rd_desc->count, len), + tss->flags); if (ret > 0) rd_desc->count -= ret; return ret; -- cgit v1.2.3 From 15b2bee22a0390d951301b53e83df88d0350c499 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Tue, 27 Jan 2009 16:41:58 -0800 Subject: e1000: fix bug with shared interrupt during reset A nasty bug was found where an MTU change (or anything else that caused a reset) could race with the interrupt code. The interrupt code was entered by a shared interrupt during the MTU change. This change prevents the interrupt code from running while the driver is in the middle of its reset path. Signed-off-by: Jesse Brandeburg Signed-off-by: David S. Miller --- drivers/net/e1000/e1000_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index 26474c92193..c986978ce76 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -31,7 +31,7 @@ char e1000_driver_name[] = "e1000"; static char e1000_driver_string[] = "Intel(R) PRO/1000 Network Driver"; -#define DRV_VERSION "7.3.20-k3-NAPI" +#define DRV_VERSION "7.3.21-k3-NAPI" const char e1000_driver_version[] = DRV_VERSION; static const char e1000_copyright[] = "Copyright (c) 1999-2006 Intel Corporation."; @@ -3712,7 +3712,7 @@ static irqreturn_t e1000_intr(int irq, void *data) struct e1000_hw *hw = &adapter->hw; u32 rctl, icr = er32(ICR); - if (unlikely(!icr)) + if (unlikely((!icr) || test_bit(__E1000_RESETTING, &adapter->flags))) return IRQ_NONE; /* Not our interrupt */ /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is -- cgit v1.2.3 From 94cd3e6cbebf85903b4d53ed2147bdb4c6e08625 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 27 Jan 2009 17:45:10 -0800 Subject: net: wrong test in inet_ehash_locks_alloc() In commit 9db66bdcc83749affe61c61eb8ff3cf08f42afec (net: convert TCP/DCCP ehash rwlocks to spinlocks), I forgot to change one occurrence of rwlock_t to spinlock_t I believe sizeof(raw_spinlock_t) might be > 0 on !CONFIG_SMP if CONFIG_DEBUG_SPINLOCK while sizeof(raw_rwlock_t) should be 0 in this case. Fortunatly, CONFIG_DEBUG_SPINLOCK adds fields to both spinlock_t and rwlock_t, but at this might change in the future (being able to debug spinlocks but not rwlocks for example), better to be safe. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index f44bb5c77a7..d0a043153cc 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -182,7 +182,7 @@ static inline int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) size = 2048; if (nr_pcpus >= 32) size = 4096; - if (sizeof(rwlock_t) != 0) { + if (sizeof(spinlock_t) != 0) { #ifdef CONFIG_NUMA if (size * sizeof(spinlock_t) > PAGE_SIZE) hashinfo->ehash_locks = vmalloc(size * sizeof(spinlock_t)); -- cgit v1.2.3 From 6c06a478c9e59d1584a5dc1b2b3519bae5d6546a Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 27 Jan 2009 22:30:19 -0800 Subject: net: fix xfrm reverse flow lookup for icmp6 This patch fixes the xfrm reverse flow lookup for icmp6 so that icmp6 packets don't get lost over ipsec tunnels. Similar patch is in RHEL5 kernel for a quite long time and I do not see why it isn't in mainline. Signed-off-by: Jiri Pirko Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/icmp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 4f433847d95..36dff880718 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -443,10 +443,10 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, if (xfrm_decode_session_reverse(skb, &fl2, AF_INET6)) goto relookup_failed; - if (ip6_dst_lookup(sk, &dst2, &fl)) + if (ip6_dst_lookup(sk, &dst2, &fl2)) goto relookup_failed; - err = xfrm_lookup(net, &dst2, &fl, sk, XFRM_LOOKUP_ICMP); + err = xfrm_lookup(net, &dst2, &fl2, sk, XFRM_LOOKUP_ICMP); switch (err) { case 0: dst_release(dst); -- cgit v1.2.3 From 1d6e55f195128813f96458203a9fa14204f9251e Mon Sep 17 00:00:00 2001 From: Thomas Goff Date: Tue, 27 Jan 2009 22:39:59 -0800 Subject: IPv6: Fix multicast routing bugs. This patch addresses the IPv6 multicast routing issues described below. It was tested with XORP 1.4/1.5 as the IPv6 PIM-SM routing daemon against FreeBSD peers. net/ipv6/ip6_input.c: - Don't try to forward link-local multicast packets. - Don't reset skb2->dev before calling ip6_mr_input() so packets can be identified as coming from the PIM register vif properly. net/ipv6/ip6mr.c: - Fix incoming PIM register messages processing: * The IPv6 pseudo-header should be included when checksumming PIM messages (RFC 4601 section 4.9; RFC 3973 section 4.7.1). * Packets decapsulated from PIM register messages should have skb->protocol ETH_P_IPV6. - Enable/disable IPv6 multicast forwarding on the corresponding interface when a routing daemon adds/removes a multicast virtual interface. - Remove incorrect skb_pull() to fix userspace signaling. - Enable/disable global IPv6 multicast forwarding when an IPv6 multicast routing socket is opened/closed. net/ipv6/route.c: - Don't use strict routing logic for packets decapsulated from PIM register messages (similar to disabling rp_filter for the IPv4 case). Signed-off-by: Thomas Goff Reviewed-by: Fred Templin Signed-off-by: David S. Miller --- net/ipv6/ip6_input.c | 2 +- net/ipv6/ip6mr.c | 23 ++++++++++++++++++----- net/ipv6/route.c | 2 +- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 936f48946e2..f171e8dbac9 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -255,6 +255,7 @@ int ip6_mc_input(struct sk_buff *skb) * IPv6 multicast router mode is now supported ;) */ if (dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding && + !(ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) && likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) { /* * Okay, we try to forward - split and duplicate @@ -316,7 +317,6 @@ int ip6_mc_input(struct sk_buff *skb) } if (skb2) { - skb2->dev = skb2->dst->dev; ip6_mr_input(skb2); } } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 3c51b2d827f..d19a84b7950 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -365,7 +365,9 @@ static int pim6_rcv(struct sk_buff *skb) pim = (struct pimreghdr *)skb_transport_header(skb); if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) || (pim->flags & PIM_NULL_REGISTER) || - (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && + (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, + sizeof(*pim), IPPROTO_PIM, + csum_partial((void *)pim, sizeof(*pim), 0)) && csum_fold(skb_checksum(skb, 0, skb->len, 0)))) goto drop; @@ -392,7 +394,7 @@ static int pim6_rcv(struct sk_buff *skb) skb_pull(skb, (u8 *)encap - skb->data); skb_reset_network_header(skb); skb->dev = reg_dev; - skb->protocol = htons(ETH_P_IP); + skb->protocol = htons(ETH_P_IPV6); skb->ip_summed = 0; skb->pkt_type = PACKET_HOST; dst_release(skb->dst); @@ -481,6 +483,7 @@ static int mif6_delete(struct net *net, int vifi) { struct mif_device *v; struct net_device *dev; + struct inet6_dev *in6_dev; if (vifi < 0 || vifi >= net->ipv6.maxvif) return -EADDRNOTAVAIL; @@ -513,6 +516,10 @@ static int mif6_delete(struct net *net, int vifi) dev_set_allmulti(dev, -1); + in6_dev = __in6_dev_get(dev); + if (in6_dev) + in6_dev->cnf.mc_forwarding--; + if (v->flags & MIFF_REGISTER) unregister_netdevice(dev); @@ -622,6 +629,7 @@ static int mif6_add(struct net *net, struct mif6ctl *vifc, int mrtsock) int vifi = vifc->mif6c_mifi; struct mif_device *v = &net->ipv6.vif6_table[vifi]; struct net_device *dev; + struct inet6_dev *in6_dev; int err; /* Is vif busy ? */ @@ -662,6 +670,10 @@ static int mif6_add(struct net *net, struct mif6ctl *vifc, int mrtsock) return -EINVAL; } + in6_dev = __in6_dev_get(dev); + if (in6_dev) + in6_dev->cnf.mc_forwarding++; + /* * Fill in the VIF structures */ @@ -838,8 +850,6 @@ static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi, skb->dst = dst_clone(pkt->dst); skb->ip_summed = CHECKSUM_UNNECESSARY; - - skb_pull(skb, sizeof(struct ipv6hdr)); } if (net->ipv6.mroute6_sk == NULL) { @@ -1222,8 +1232,10 @@ static int ip6mr_sk_init(struct sock *sk) rtnl_lock(); write_lock_bh(&mrt_lock); - if (likely(net->ipv6.mroute6_sk == NULL)) + if (likely(net->ipv6.mroute6_sk == NULL)) { net->ipv6.mroute6_sk = sk; + net->ipv6.devconf_all->mc_forwarding++; + } else err = -EADDRINUSE; write_unlock_bh(&mrt_lock); @@ -1242,6 +1254,7 @@ int ip6mr_sk_done(struct sock *sk) if (sk == net->ipv6.mroute6_sk) { write_lock_bh(&mrt_lock); net->ipv6.mroute6_sk = NULL; + net->ipv6.devconf_all->mc_forwarding--; write_unlock_bh(&mrt_lock); mroute_clean_tables(net); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c4a59824ac2..9c574235c90 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -794,7 +794,7 @@ void ip6_route_input(struct sk_buff *skb) .proto = iph->nexthdr, }; - if (rt6_need_strict(&iph->daddr)) + if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG) flags |= RT6_LOOKUP_F_IFACE; skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input); -- cgit v1.2.3 From a4e6db07984529847c6ad8bc616485e721dcb809 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 27 Jan 2009 22:41:03 -0800 Subject: ipv6: Make mc_forwarding sysctl read-only. The kernel manages this value internally, as necessary, as VIFs are added/removed and as multicast routers are registered and deregistered. Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e92ad8455c6..f9afb452249 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4250,7 +4250,7 @@ static struct addrconf_sysctl_table .procname = "mc_forwarding", .data = &ipv6_devconf.mc_forwarding, .maxlen = sizeof(int), - .mode = 0644, + .mode = 0444, .proc_handler = proc_dointvec, }, #endif -- cgit v1.2.3 From e6a271651e9e7810c1802bb8375967f6efa4baea Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 26 Jan 2009 19:35:28 +0100 Subject: mac80211: remove Michael Wu as maintainer His email address keeps bouncing, and he's not interested in mac80211 patches etc. anyway. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- MAINTAINERS | 2 -- 1 file changed, 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index d992d407197..474ec0c5327 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2836,8 +2836,6 @@ S: Maintained MAC80211 P: Johannes Berg M: johannes@sipsolutions.net -P: Michael Wu -M: flamingice@sourmilk.net L: linux-wireless@vger.kernel.org W: http://linuxwireless.org/ T: git kernel.org:/pub/scm/linux/kernel/git/linville/wireless-2.6.git -- cgit v1.2.3 From eb83bbf57429ab80f49b413e3e44d3b19c3fdc5a Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Tue, 27 Jan 2009 12:31:23 -0600 Subject: rtl8187: Fix error in setting OFDM power settings for RTL8187L MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After reports of poor performance, a review of the latest vendor driver (rtl8187_linux_26.1025.0328.2007) for RTL8187L devices was undertaken. A difference was found in the code used to index the OFDM power tables. When the Linux driver was changed, my unit works at a much greater range than before. I think this fixes Bugzilla #12380 and has been tested by at least two other users. Signed-off-by: Larry Finger Tested-by: Martín Ernesto Barreyro Cc: Stable Signed-off-by: John W. Linville --- drivers/net/wireless/rtl818x/rtl8187_rtl8225.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/rtl818x/rtl8187_rtl8225.c b/drivers/net/wireless/rtl818x/rtl8187_rtl8225.c index 4e75e8e7fa9..78df281b297 100644 --- a/drivers/net/wireless/rtl818x/rtl8187_rtl8225.c +++ b/drivers/net/wireless/rtl818x/rtl8187_rtl8225.c @@ -285,7 +285,10 @@ static void rtl8225_rf_set_tx_power(struct ieee80211_hw *dev, int channel) ofdm_power = priv->channels[channel - 1].hw_value >> 4; cck_power = min(cck_power, (u8)11); - ofdm_power = min(ofdm_power, (u8)35); + if (ofdm_power > (u8)15) + ofdm_power = 25; + else + ofdm_power += 10; rtl818x_iowrite8(priv, &priv->map->TX_GAIN_CCK, rtl8225_tx_gain_cck_ofdm[cck_power / 6] >> 1); @@ -536,7 +539,10 @@ static void rtl8225z2_rf_set_tx_power(struct ieee80211_hw *dev, int channel) cck_power += priv->txpwr_base & 0xF; cck_power = min(cck_power, (u8)35); - ofdm_power = min(ofdm_power, (u8)15); + if (ofdm_power > (u8)15) + ofdm_power = 25; + else + ofdm_power += 10; ofdm_power += priv->txpwr_base >> 4; ofdm_power = min(ofdm_power, (u8)35); -- cgit v1.2.3 From 1f304e4e3bb161163d9f5bc3c6467a2a6fa9b3ae Mon Sep 17 00:00:00 2001 From: "Zhu, Yi" Date: Fri, 23 Jan 2009 13:45:22 -0800 Subject: iwlwifi: fix kernel oops when ucode DMA memory allocation failure The patch fixes memcpy to NULL address when the ucode DMA allocation failure. This is a fix to bug http://www.intellinuxwireless.org/bugzilla/show_bug.cgi?id=1861 Signed-off-by: Zhu Yi Signed-off-by: Reinette Chatre Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-agn.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c index 0dc8eed1640..b35c8813bef 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn.c @@ -1719,6 +1719,10 @@ static int iwl_read_ucode(struct iwl_priv *priv) priv->ucode_data_backup.len = data_size; iwl_alloc_fw_desc(priv->pci_dev, &priv->ucode_data_backup); + if (!priv->ucode_code.v_addr || !priv->ucode_data.v_addr || + !priv->ucode_data_backup.v_addr) + goto err_pci_alloc; + /* Initialization instructions and data */ if (init_size && init_data_size) { priv->ucode_init.len = init_size; -- cgit v1.2.3 From 615aab4b75dfa77b00c372330d6f70edd2458bf9 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 22 Jan 2009 15:05:46 -0800 Subject: cfg80211: Fix sanity check on 5 GHz when processing country IE This fixes two issues with the sanity check loop when processing the country IE: 1. Do not use frequency for the current subband channel check, this was a big fat typo. 2. Apply the 5 GHz 4-channel steps when considering max channel on each subband as was done with a recent patch. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/reg.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index bc494cef210..61698095e1a 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -498,6 +498,7 @@ static struct ieee80211_regdomain *country_ie_2_rd( * calculate the number of reg rules we will need. We will need one * for each channel subband */ while (country_ie_len >= 3) { + int end_channel = 0; struct ieee80211_country_ie_triplet *triplet = (struct ieee80211_country_ie_triplet *) country_ie; int cur_sub_max_channel = 0, cur_channel = 0; @@ -509,9 +510,25 @@ static struct ieee80211_regdomain *country_ie_2_rd( continue; } + /* 2 GHz */ + if (triplet->chans.first_channel <= 14) + end_channel = triplet->chans.first_channel + + triplet->chans.num_channels; + else + /* + * 5 GHz -- For example in country IEs if the first + * channel given is 36 and the number of channels is 4 + * then the individual channel numbers defined for the + * 5 GHz PHY by these parameters are: 36, 40, 44, and 48 + * and not 36, 37, 38, 39. + * + * See: http://tinyurl.com/11d-clarification + */ + end_channel = triplet->chans.first_channel + + (4 * (triplet->chans.num_channels - 1)); + cur_channel = triplet->chans.first_channel; - cur_sub_max_channel = ieee80211_channel_to_frequency( - cur_channel + triplet->chans.num_channels); + cur_sub_max_channel = end_channel; /* Basic sanity check */ if (cur_sub_max_channel < cur_channel) @@ -590,15 +607,6 @@ static struct ieee80211_regdomain *country_ie_2_rd( end_channel = triplet->chans.first_channel + triplet->chans.num_channels; else - /* - * 5 GHz -- For example in country IEs if the first - * channel given is 36 and the number of channels is 4 - * then the individual channel numbers defined for the - * 5 GHz PHY by these parameters are: 36, 40, 44, and 48 - * and not 36, 37, 38, 39. - * - * See: http://tinyurl.com/11d-clarification - */ end_channel = triplet->chans.first_channel + (4 * (triplet->chans.num_channels - 1)); -- cgit v1.2.3 From 667ecd010d870f861a9e276aaaca8cb443ded8b3 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 22 Jan 2009 15:05:43 -0800 Subject: cfg80211: print correct intersected regulatory domain When CONFIG_CFG80211_REG_DEBUG is enabled and an intersection occurs we are printing the regulatory domain passed by CRDA and indicating its the intersected regulatory domain. Lets fix this and print the intersection as originally intended. Signed-off-by: Luis R. Rodriguez Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/reg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 61698095e1a..85c9034c59b 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1284,7 +1284,7 @@ static void reg_country_ie_process_debug( if (intersected_rd) { printk(KERN_DEBUG "cfg80211: We intersect both of these " "and get:\n"); - print_regdomain_info(rd); + print_regdomain_info(intersected_rd); return; } printk(KERN_DEBUG "cfg80211: Intersection between both failed\n"); -- cgit v1.2.3 From be0093705c3ce98d73cac0ca8f93ea105cdf4e9c Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Thu, 22 Jan 2009 08:44:16 -0500 Subject: ath5k: fix locking in ath5k_config ath5k_config updates the software context without taking sc->lock. Changes-licensed-under: 3-Clause-BSD Signed-off-by: Bob Copeland Acked-by: Nick Kossifidis Signed-off-by: John W. Linville --- drivers/net/wireless/ath5k/base.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath5k/base.c b/drivers/net/wireless/ath5k/base.c index 8ef87356e08..a533ed60bb4 100644 --- a/drivers/net/wireless/ath5k/base.c +++ b/drivers/net/wireless/ath5k/base.c @@ -1028,6 +1028,8 @@ ath5k_setup_bands(struct ieee80211_hw *hw) * it's done by reseting the chip. To accomplish this we must * first cleanup any pending DMA, then restart stuff after a la * ath5k_init. + * + * Called with sc->lock. */ static int ath5k_chan_set(struct ath5k_softc *sc, struct ieee80211_channel *chan) @@ -2814,11 +2816,17 @@ ath5k_config(struct ieee80211_hw *hw, u32 changed) { struct ath5k_softc *sc = hw->priv; struct ieee80211_conf *conf = &hw->conf; + int ret; + + mutex_lock(&sc->lock); sc->bintval = conf->beacon_int; sc->power_level = conf->power_level; - return ath5k_chan_set(sc, conf->channel); + ret = ath5k_chan_set(sc, conf->channel); + + mutex_unlock(&sc->lock); + return ret; } static int -- cgit v1.2.3 From e125646ab56b490d0390b158e0afa9cccfc1f897 Mon Sep 17 00:00:00 2001 From: Dhananjay Phadke Date: Thu, 29 Jan 2009 16:05:19 -0800 Subject: netxen: revert jumbo ringsize Reducing jumbo ring size below 1024 reduces throughput for old firmwares (3.4.216 and older) running on older (NX2031) chip, so restore it back to 1024. This was reduced in commit 32ec803348b4d5f1353e1d7feae30880b8b3e342 ("netxen: reduce memory footprint"). Raising jumbo ring size from 512 to 1024, adds ~4MB per port, but there's still big saving because of original patch (~20MB per port). Signed-off-by: Dhananjay Phadke Signed-off-by: David S. Miller --- drivers/net/netxen/netxen_nic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/netxen/netxen_nic.h b/drivers/net/netxen/netxen_nic.h index a75a31005fd..9c78c963b72 100644 --- a/drivers/net/netxen/netxen_nic.h +++ b/drivers/net/netxen/netxen_nic.h @@ -210,7 +210,7 @@ #define MAX_CMD_DESCRIPTORS_HOST 1024 #define MAX_RCV_DESCRIPTORS_1G 2048 #define MAX_RCV_DESCRIPTORS_10G 4096 -#define MAX_JUMBO_RCV_DESCRIPTORS 512 +#define MAX_JUMBO_RCV_DESCRIPTORS 1024 #define MAX_LRO_RCV_DESCRIPTORS 8 #define MAX_RCVSTATUS_DESCRIPTORS MAX_RCV_DESCRIPTORS #define MAX_JUMBO_RCV_DESC MAX_JUMBO_RCV_DESCRIPTORS -- cgit v1.2.3 From 95e3b24cfb4ec0479d2c42f7a1780d68063a542a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 29 Jan 2009 16:07:52 -0800 Subject: net: Fix frag_list handling in skb_seq_read The frag_list handling was broken in skb_seq_read: 1) We didn't add the stepped offset when looking at the head are of fragments other than the first. 2) We didn't take the stepped offset away when setting the data pointer in the head area. 3) The frag index wasn't reset. This patch fixes both issues. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/skbuff.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2e5f2ca3bdc..f23fd43539e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2212,10 +2212,10 @@ unsigned int skb_seq_read(unsigned int consumed, const u8 **data, return 0; next_skb: - block_limit = skb_headlen(st->cur_skb); + block_limit = skb_headlen(st->cur_skb) + st->stepped_offset; if (abs_offset < block_limit) { - *data = st->cur_skb->data + abs_offset; + *data = st->cur_skb->data + (abs_offset - st->stepped_offset); return block_limit - abs_offset; } @@ -2257,6 +2257,7 @@ next_skb: } else if (st->root_skb == st->cur_skb && skb_shinfo(st->root_skb)->frag_list) { st->cur_skb = skb_shinfo(st->root_skb)->frag_list; + st->frag_idx = 0; goto next_skb; } -- cgit v1.2.3 From 71b3346d182355f19509fadb8fe45114a35cc499 Mon Sep 17 00:00:00 2001 From: Shyam Iyer Date: Thu, 29 Jan 2009 16:12:42 -0800 Subject: net: Fix OOPS in skb_seq_read(). It oopsd for me in skb_seq_read. addr2line said it was linux-2.6/net/core/skbuff.c:2228, which is this line: while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) { I added some printks in there and it looks like we hit this: } else if (st->root_skb == st->cur_skb && skb_shinfo(st->root_skb)->frag_list) { st->cur_skb = skb_shinfo(st->root_skb)->frag_list; st->frag_idx = 0; goto next_skb; } Actually I did some testing and added a few printks and found that the st->cur_skb->data was 0 and hence the ptr used by iscsi_tcp was null. This caused the kernel panic. if (abs_offset < block_limit) { - *data = st->cur_skb->data + abs_offset; + *data = st->cur_skb->data + (abs_offset - st->stepped_offset); I enabled the debug_tcp and with a few printks found that the code did not go to the next_skb label and could find that the sequence being followed was this - It hit this if condition - if (st->cur_skb->next) { st->cur_skb = st->cur_skb->next; st->frag_idx = 0; goto next_skb; And so, now the st pointer is shifted to the next skb whereas actually it should have hit the second else if first since the data is in the frag_list. else if (st->root_skb == st->cur_skb && skb_shinfo(st->root_skb)->frag_list) { st->cur_skb = skb_shinfo(st->root_skb)->frag_list; goto next_skb; } Reversing the two conditions the attached patch fixes the issue for me on top of Herbert's patches. Signed-off-by: Shyam Iyer Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/skbuff.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f23fd43539e..da74b844f4e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2250,13 +2250,13 @@ next_skb: st->frag_data = NULL; } - if (st->cur_skb->next) { - st->cur_skb = st->cur_skb->next; + if (st->root_skb == st->cur_skb && + skb_shinfo(st->root_skb)->frag_list) { + st->cur_skb = skb_shinfo(st->root_skb)->frag_list; st->frag_idx = 0; goto next_skb; - } else if (st->root_skb == st->cur_skb && - skb_shinfo(st->root_skb)->frag_list) { - st->cur_skb = skb_shinfo(st->root_skb)->frag_list; + } else if (st->cur_skb->next) { + st->cur_skb = st->cur_skb->next; st->frag_idx = 0; goto next_skb; } -- cgit v1.2.3 From 58092d1e0a896eb1d9163d58f93df7ed704fa8e1 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 29 Jan 2009 16:16:31 -0800 Subject: net: update documentation ip aliases This documentation is old. Add a short note to describe why aliases are no long necessary, and remove the old contact/edit info. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- Documentation/networking/alias.txt | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/Documentation/networking/alias.txt b/Documentation/networking/alias.txt index cd12c2ff518..85046f53fcf 100644 --- a/Documentation/networking/alias.txt +++ b/Documentation/networking/alias.txt @@ -2,13 +2,13 @@ IP-Aliasing: ============ -IP-aliases are additional IP-addresses/masks hooked up to a base -interface by adding a colon and a string when running ifconfig. -This string is usually numeric, but this is not a must. - -IP-Aliases are avail if CONFIG_INET (`standard' IPv4 networking) -is configured in the kernel. +IP-aliases are an obsolete way to manage multiple IP-addresses/masks +per interface. Newer tools such as iproute2 support multiple +address/prefixes per interface, but aliases are still supported +for backwards compatibility. +An alias is formed by adding a colon and a string when running ifconfig. +This string is usually numeric, but this is not a must. o Alias creation. Alias creation is done by 'magic' interface naming: eg. to create a @@ -38,16 +38,3 @@ o Relationship with main device If the base device is shut down the added aliases will be deleted too. - - -Contact -------- -Please finger or e-mail me: - Juan Jose Ciarlante - -Updated by Erik Schoenfelder - -; local variables: -; mode: indented-text -; mode: auto-fill -; end: -- cgit v1.2.3 From 9d8dba6c979fa99c96938c869611b9a23b73efa9 Mon Sep 17 00:00:00 2001 From: Benjamin Zores Date: Thu, 29 Jan 2009 16:19:13 -0800 Subject: ipv4: fix infinite retry loop in IP-Config Signed-off-by: Benjamin Zores Signed-off-by: David S. Miller --- net/ipv4/ipconfig.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 42a0f3dd3fd..d722013c1ca 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1268,6 +1268,9 @@ __be32 __init root_nfs_parse_addr(char *name) static int __init ip_auto_config(void) { __be32 addr; +#ifdef IPCONFIG_DYNAMIC + int retries = CONF_OPEN_RETRIES; +#endif #ifdef CONFIG_PROC_FS proc_net_fops_create(&init_net, "pnp", S_IRUGO, &pnp_seq_fops); @@ -1304,9 +1307,6 @@ static int __init ip_auto_config(void) #endif ic_first_dev->next) { #ifdef IPCONFIG_DYNAMIC - - int retries = CONF_OPEN_RETRIES; - if (ic_dynamic() < 0) { ic_close_devs(); -- cgit v1.2.3 From df1c46b2b6876d0a1b1b4740f009fa69d95ebbc9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 29 Jan 2009 16:53:35 -0800 Subject: tun: Add some missing TUN compat ioctl translations. Based upon a report from Michael Tokarev : Just saw in dmesg: ioctl32(kvm:4408): Unknown cmd fd(9) cmd(800454cf){t:'T';sz:4} arg(ffc668e4) on /dev/net/tun Signed-off-by: David S. Miller --- fs/compat_ioctl.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 5235c67e759..c8f8d5904f5 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -538,6 +538,7 @@ static int dev_ifsioc(unsigned int fd, unsigned int cmd, unsigned long arg) * cannot be fixed without breaking all existing apps. */ case TUNSETIFF: + case TUNGETIFF: case SIOCGIFFLAGS: case SIOCGIFMETRIC: case SIOCGIFMTU: @@ -1982,6 +1983,11 @@ COMPATIBLE_IOCTL(TUNSETNOCSUM) COMPATIBLE_IOCTL(TUNSETDEBUG) COMPATIBLE_IOCTL(TUNSETPERSIST) COMPATIBLE_IOCTL(TUNSETOWNER) +COMPATIBLE_IOCTL(TUNSETLINK) +COMPATIBLE_IOCTL(TUNSETGROUP) +COMPATIBLE_IOCTL(TUNGETFEATURES) +COMPATIBLE_IOCTL(TUNSETOFFLOAD) +COMPATIBLE_IOCTL(TUNSETTXFILTER) /* Big V */ COMPATIBLE_IOCTL(VT_SETMODE) COMPATIBLE_IOCTL(VT_GETMODE) @@ -2573,6 +2579,7 @@ HANDLE_IOCTL(SIOCGIFPFLAGS, dev_ifsioc) HANDLE_IOCTL(SIOCGIFTXQLEN, dev_ifsioc) HANDLE_IOCTL(SIOCSIFTXQLEN, dev_ifsioc) HANDLE_IOCTL(TUNSETIFF, dev_ifsioc) +HANDLE_IOCTL(TUNGETIFF, dev_ifsioc) HANDLE_IOCTL(SIOCETHTOOL, ethtool_ioctl) HANDLE_IOCTL(SIOCBONDENSLAVE, bond_ioctl) HANDLE_IOCTL(SIOCBONDRELEASE, bond_ioctl) -- cgit v1.2.3 From 584dbe9475313e117abf9d2af88164edfd429c9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Marjam=C3=A4ki?= Date: Thu, 29 Jan 2009 08:55:56 +0000 Subject: netxen: fix memory leak in drivers/net/netxen_nic_init.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For kernel bugzilla #12537: http://bugzilla.kernel.org/show_bug.cgi?id=12537 Free memory. Signed-off-by: Daniel Marjamäki Signed-off-by: David S. Miller --- drivers/net/netxen/netxen_nic_init.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/netxen/netxen_nic_init.c b/drivers/net/netxen/netxen_nic_init.c index ca7c8d8050c..ffd37bea162 100644 --- a/drivers/net/netxen/netxen_nic_init.c +++ b/drivers/net/netxen/netxen_nic_init.c @@ -947,8 +947,10 @@ int netxen_pinit_from_rom(struct netxen_adapter *adapter, int verbose) } for (i = 0; i < n; i++) { if (netxen_rom_fast_read(adapter, 8*i + 4*offset, &val) != 0 || - netxen_rom_fast_read(adapter, 8*i + 4*offset + 4, &addr) != 0) + netxen_rom_fast_read(adapter, 8*i + 4*offset + 4, &addr) != 0) { + kfree(buf); return -EIO; + } buf[i].addr = addr; buf[i].data = val; -- cgit v1.2.3 From 1af7ad51049d6a310a19d497960597198290ddfa Mon Sep 17 00:00:00 2001 From: Inaky Perez-Gonzalez Date: Thu, 29 Jan 2009 17:18:31 -0800 Subject: wimax: fix build issue when debugfs is disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As reported by Toralf Förster and Randy Dunlap. - http://linuxwimax.org/pipermail/wimax/2009-January/000460.html - http://lkml.org/lkml/2009/1/29/279 The definitions needed for the wimax stack and i2400m driver debug infrastructure was, by mistake, compiled depending on CONFIG_DEBUG_FS (by them being placed in the debugfs.c files); thus the build broke in 2.6.29-rc3 when debugging was enabled (CONFIG_WIMAX_DEBUG) and DEBUG_FS was disabled. These definitions are always needed if debug is enabled at compile time (independently of DEBUG_FS being or not enabled), so moving them to a file that is always compiled fixes the issue. Signed-off-by: Inaky Perez-Gonzalez Signed-off-by: David S. Miller --- drivers/net/wimax/i2400m/debugfs.c | 14 -------------- drivers/net/wimax/i2400m/driver.c | 16 ++++++++++++++++ net/wimax/debugfs.c | 11 ----------- net/wimax/stack.c | 13 +++++++++++++ 4 files changed, 29 insertions(+), 25 deletions(-) diff --git a/drivers/net/wimax/i2400m/debugfs.c b/drivers/net/wimax/i2400m/debugfs.c index 62663298597..9b81af3f80a 100644 --- a/drivers/net/wimax/i2400m/debugfs.c +++ b/drivers/net/wimax/i2400m/debugfs.c @@ -234,20 +234,6 @@ struct dentry *debugfs_create_i2400m_reset( &fops_i2400m_reset); } -/* - * Debug levels control; see debug.h - */ -struct d_level D_LEVEL[] = { - D_SUBMODULE_DEFINE(control), - D_SUBMODULE_DEFINE(driver), - D_SUBMODULE_DEFINE(debugfs), - D_SUBMODULE_DEFINE(fw), - D_SUBMODULE_DEFINE(netdev), - D_SUBMODULE_DEFINE(rfkill), - D_SUBMODULE_DEFINE(rx), - D_SUBMODULE_DEFINE(tx), -}; -size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL); #define __debugfs_register(prefix, name, parent) \ do { \ diff --git a/drivers/net/wimax/i2400m/driver.c b/drivers/net/wimax/i2400m/driver.c index 5f98047e18c..e80a0b65a75 100644 --- a/drivers/net/wimax/i2400m/driver.c +++ b/drivers/net/wimax/i2400m/driver.c @@ -707,6 +707,22 @@ void i2400m_release(struct i2400m *i2400m) EXPORT_SYMBOL_GPL(i2400m_release); +/* + * Debug levels control; see debug.h + */ +struct d_level D_LEVEL[] = { + D_SUBMODULE_DEFINE(control), + D_SUBMODULE_DEFINE(driver), + D_SUBMODULE_DEFINE(debugfs), + D_SUBMODULE_DEFINE(fw), + D_SUBMODULE_DEFINE(netdev), + D_SUBMODULE_DEFINE(rfkill), + D_SUBMODULE_DEFINE(rx), + D_SUBMODULE_DEFINE(tx), +}; +size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL); + + static int __init i2400m_driver_init(void) { diff --git a/net/wimax/debugfs.c b/net/wimax/debugfs.c index 87cf4430079..94d216a4640 100644 --- a/net/wimax/debugfs.c +++ b/net/wimax/debugfs.c @@ -28,17 +28,6 @@ #include "debug-levels.h" -/* Debug framework control of debug levels */ -struct d_level D_LEVEL[] = { - D_SUBMODULE_DEFINE(debugfs), - D_SUBMODULE_DEFINE(id_table), - D_SUBMODULE_DEFINE(op_msg), - D_SUBMODULE_DEFINE(op_reset), - D_SUBMODULE_DEFINE(op_rfkill), - D_SUBMODULE_DEFINE(stack), -}; -size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL); - #define __debugfs_register(prefix, name, parent) \ do { \ result = d_level_register_debugfs(prefix, name, parent); \ diff --git a/net/wimax/stack.c b/net/wimax/stack.c index d4da92f8981..3869c032788 100644 --- a/net/wimax/stack.c +++ b/net/wimax/stack.c @@ -516,6 +516,19 @@ void wimax_dev_rm(struct wimax_dev *wimax_dev) } EXPORT_SYMBOL_GPL(wimax_dev_rm); + +/* Debug framework control of debug levels */ +struct d_level D_LEVEL[] = { + D_SUBMODULE_DEFINE(debugfs), + D_SUBMODULE_DEFINE(id_table), + D_SUBMODULE_DEFINE(op_msg), + D_SUBMODULE_DEFINE(op_reset), + D_SUBMODULE_DEFINE(op_rfkill), + D_SUBMODULE_DEFINE(stack), +}; +size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL); + + struct genl_family wimax_gnl_family = { .id = GENL_ID_GENERATE, .name = "WiMAX", -- cgit v1.2.3 From b1c4a9dddf09fe99b8f88252718ac5b357363dc4 Mon Sep 17 00:00:00 2001 From: Haiying Wang Date: Thu, 29 Jan 2009 17:28:04 -0800 Subject: ucc_geth: Change uec phy id to the same format as gianfar's The commit b31a1d8b41513b96e9c7ec2f68c5734cef0b26a4 ("gianfar: Convert gianfar to an of_platform_driver") changes the gianfar's phy id to the format like "mdio@xxxx:xx", but uec still uses the old format like "xxxxxxxx:xx". For the board whose UEC uses gianfar-mdio like MPC8568MDS, the phy can not be attached because of the incompatible phy id format. This patch changes uec's phy id to the same format as gianfar's. Signed-off-by: Haiying Wang Signed-off-by: David S. Miller --- drivers/net/ucc_geth.c | 20 ++++++++++++++++++-- drivers/net/ucc_geth.h | 2 ++ drivers/net/ucc_geth_mii.c | 12 +++++++++++- drivers/net/ucc_geth_mii.h | 1 + 4 files changed, 32 insertions(+), 3 deletions(-) diff --git a/drivers/net/ucc_geth.c b/drivers/net/ucc_geth.c index 11441225bf4..e87986867ba 100644 --- a/drivers/net/ucc_geth.c +++ b/drivers/net/ucc_geth.c @@ -1536,6 +1536,11 @@ static void adjust_link(struct net_device *dev) static int init_phy(struct net_device *dev) { struct ucc_geth_private *priv = netdev_priv(dev); + struct device_node *np = priv->node; + struct device_node *phy, *mdio; + const phandle *ph; + char bus_name[MII_BUS_ID_SIZE]; + const unsigned int *id; struct phy_device *phydev; char phy_id[BUS_ID_SIZE]; @@ -1543,8 +1548,18 @@ static int init_phy(struct net_device *dev) priv->oldspeed = 0; priv->oldduplex = -1; - snprintf(phy_id, sizeof(phy_id), PHY_ID_FMT, priv->ug_info->mdio_bus, - priv->ug_info->phy_address); + ph = of_get_property(np, "phy-handle", NULL); + phy = of_find_node_by_phandle(*ph); + mdio = of_get_parent(phy); + + id = of_get_property(phy, "reg", NULL); + + of_node_put(phy); + of_node_put(mdio); + + uec_mdio_bus_name(bus_name, mdio); + snprintf(phy_id, sizeof(phy_id), "%s:%02x", + bus_name, *id); phydev = phy_connect(dev, phy_id, &adjust_link, 0, priv->phy_interface); @@ -3748,6 +3763,7 @@ static int ucc_geth_probe(struct of_device* ofdev, const struct of_device_id *ma ugeth->ug_info = ug_info; ugeth->dev = dev; + ugeth->node = np; return 0; } diff --git a/drivers/net/ucc_geth.h b/drivers/net/ucc_geth.h index 8f699cb773e..16cbe42ba43 100644 --- a/drivers/net/ucc_geth.h +++ b/drivers/net/ucc_geth.h @@ -1186,6 +1186,8 @@ struct ucc_geth_private { int oldspeed; int oldduplex; int oldlink; + + struct device_node *node; }; void uec_set_ethtool_ops(struct net_device *netdev); diff --git a/drivers/net/ucc_geth_mii.c b/drivers/net/ucc_geth_mii.c index c001d261366..54635911305 100644 --- a/drivers/net/ucc_geth_mii.c +++ b/drivers/net/ucc_geth_mii.c @@ -156,7 +156,7 @@ static int uec_mdio_probe(struct of_device *ofdev, const struct of_device_id *ma if (err) goto reg_map_fail; - snprintf(new_bus->id, MII_BUS_ID_SIZE, "%x", res.start); + uec_mdio_bus_name(new_bus->id, np); new_bus->irq = kmalloc(32 * sizeof(int), GFP_KERNEL); @@ -283,3 +283,13 @@ void uec_mdio_exit(void) { of_unregister_platform_driver(&uec_mdio_driver); } + +void uec_mdio_bus_name(char *name, struct device_node *np) +{ + const u32 *reg; + + reg = of_get_property(np, "reg", NULL); + + snprintf(name, MII_BUS_ID_SIZE, "%s@%x", np->name, reg ? *reg : 0); +} + diff --git a/drivers/net/ucc_geth_mii.h b/drivers/net/ucc_geth_mii.h index 1e45b2028a5..840cf80235b 100644 --- a/drivers/net/ucc_geth_mii.h +++ b/drivers/net/ucc_geth_mii.h @@ -97,4 +97,5 @@ int uec_mdio_read(struct mii_bus *bus, int mii_id, int regnum); int uec_mdio_write(struct mii_bus *bus, int mii_id, int regnum, u16 value); int __init uec_mdio_init(void); void uec_mdio_exit(void); +void uec_mdio_bus_name(char *name, struct device_node *np); #endif /* __UEC_MII_H */ -- cgit v1.2.3 From 1609559547ae0ddc2e4829c7f78ac2c4869875b9 Mon Sep 17 00:00:00 2001 From: Steve Glendinning Date: Thu, 29 Jan 2009 17:29:15 -0800 Subject: smsc9420: fix interrupt signalling test failures smsc9420 performs an interrupt signalling test when the interface is brought up. The current code mistakenly sets its test flag to false AFTER enabling the software interrupt source, making failure quite likely. This patch changes the code to set the test flag BEFORE enabling interrupts. I've also removed an smp_wmb because the following spinlock provides an implicit memory barrier. Signed-off-by: Steve Glendinning Signed-off-by: David S. Miller --- drivers/net/smsc9420.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/smsc9420.c b/drivers/net/smsc9420.c index c14a4c6452c..d801900a503 100644 --- a/drivers/net/smsc9420.c +++ b/drivers/net/smsc9420.c @@ -1378,6 +1378,7 @@ static int smsc9420_open(struct net_device *dev) /* test the IRQ connection to the ISR */ smsc_dbg(IFUP, "Testing ISR using IRQ %d", dev->irq); + pd->software_irq_signal = false; spin_lock_irqsave(&pd->int_lock, flags); /* configure interrupt deassertion timer and enable interrupts */ @@ -1393,8 +1394,6 @@ static int smsc9420_open(struct net_device *dev) smsc9420_pci_flush_write(pd); timeout = 1000; - pd->software_irq_signal = false; - smp_wmb(); while (timeout--) { if (pd->software_irq_signal) break; -- cgit v1.2.3 From f307dbd88d82c4ccab7aec49613c366023b89cde Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Thu, 29 Jan 2009 17:30:00 -0800 Subject: smsc911x: timeout reaches -1 With a postfix decrement the timeout will reach -1 rather than 0, so the warning will not be issued. Signed-off-by: Roel Kluin Acked-by: Steve Glendinning Signed-off-by: David S. Miller --- drivers/net/smsc911x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c index f513bdf1c88..783c1a7b869 100644 --- a/drivers/net/smsc911x.c +++ b/drivers/net/smsc911x.c @@ -953,7 +953,7 @@ smsc911x_rx_fastforward(struct smsc911x_data *pdata, unsigned int pktbytes) do { udelay(1); val = smsc911x_reg_read(pdata, RX_DP_CTRL); - } while (timeout-- && (val & RX_DP_CTRL_RX_FFWD_)); + } while (--timeout && (val & RX_DP_CTRL_RX_FFWD_)); if (unlikely(timeout == 0)) SMSC_WARNING(HW, "Timed out waiting for " -- cgit v1.2.3 From e5664bb2a7fd8ae1bee1281c2e44653c471af9ca Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Thu, 29 Jan 2009 17:31:13 -0800 Subject: gianfar: Fix Wake-on-LAN support commit 0f0ca340e57bd7446855fefd07a64249acf81223 ("phy: power management support") caused a regression in the gianfar driver. Now phylib turns off PHY power during suspend, and thus WOL doesn't work anymore. This patch workarounds the issue by enabling wakeup in the MDIO device, i.e. just restores the old behaviour for the gianfar driver. Note that this way all PHYs on a given MDIO bus won't be turned off during suspend, which isn't good from the power saving point of view. A proper, per netdevice wakeup management support will need a bit reworked phylib suspend/resume logic. Signed-off-by: Anton Vorontsov Signed-off-by: David S. Miller --- drivers/net/gianfar_mii.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/gianfar_mii.c b/drivers/net/gianfar_mii.c index f3706e415b4..f49a426ad68 100644 --- a/drivers/net/gianfar_mii.c +++ b/drivers/net/gianfar_mii.c @@ -234,6 +234,8 @@ static int gfar_mdio_probe(struct of_device *ofdev, if (NULL == new_bus) return -ENOMEM; + device_init_wakeup(&ofdev->dev, 1); + new_bus->name = "Gianfar MII Bus", new_bus->read = &gfar_mdio_read, new_bus->write = &gfar_mdio_write, -- cgit v1.2.3 From c25b9abbc2c2c0da88e180c3933d6e773245815a Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Thu, 29 Jan 2009 17:32:20 -0800 Subject: drivers/net/skfp: if !capable(CAP_NET_ADMIN): inverted logic Fix inverted logic Signed-off-by: Roel Kluin Signed-off-by: David S. Miller --- drivers/net/skfp/skfddi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/skfp/skfddi.c b/drivers/net/skfp/skfddi.c index 607efeaf0bc..9a00e5566af 100644 --- a/drivers/net/skfp/skfddi.c +++ b/drivers/net/skfp/skfddi.c @@ -1003,9 +1003,9 @@ static int skfp_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) break; case SKFP_CLR_STATS: /* Zero out the driver statistics */ if (!capable(CAP_NET_ADMIN)) { - memset(&lp->MacStat, 0, sizeof(lp->MacStat)); - } else { status = -EPERM; + } else { + memset(&lp->MacStat, 0, sizeof(lp->MacStat)); } break; default: -- cgit v1.2.3 From f99ec0649accb581cf3e8fcfeea796e82d05f4ea Mon Sep 17 00:00:00 2001 From: Philippe De Muyter Date: Thu, 29 Jan 2009 17:35:04 -0800 Subject: tulip: fix 21142 with 10Mbps without negotiation with current kernels, tulip 21142 ethernet controllers fail to connect to a 10Mbps only (i.e. without negotiation-partner) network. It used to work in 2.4 kernels. Fix that. Tested on a 21142 Rev 0x11. Signed-off-by: Philippe De Muyter Signed-off-by: David S. Miller --- drivers/net/tulip/21142.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/net/tulip/21142.c b/drivers/net/tulip/21142.c index 1210fb3748a..db7d5e11855 100644 --- a/drivers/net/tulip/21142.c +++ b/drivers/net/tulip/21142.c @@ -9,6 +9,11 @@ Please refer to Documentation/DocBook/tulip-user.{pdf,ps,html} for more information on this driver. + + DC21143 manual "21143 PCI/CardBus 10/100Mb/s Ethernet LAN Controller + Hardware Reference Manual" is currently available at : + http://developer.intel.com/design/network/manuals/278074.htm + Please submit bugs to http://bugzilla.kernel.org/ . */ @@ -32,7 +37,11 @@ void t21142_media_task(struct work_struct *work) int csr12 = ioread32(ioaddr + CSR12); int next_tick = 60*HZ; int new_csr6 = 0; + int csr14 = ioread32(ioaddr + CSR14); + /* CSR12[LS10,LS100] are not reliable during autonegotiation */ + if ((csr14 & 0x80) && (csr12 & 0x7000) != 0x5000) + csr12 |= 6; if (tulip_debug > 2) printk(KERN_INFO"%s: 21143 negotiation status %8.8x, %s.\n", dev->name, csr12, medianame[dev->if_port]); @@ -76,7 +85,7 @@ void t21142_media_task(struct work_struct *work) new_csr6 = 0x83860000; dev->if_port = 3; iowrite32(0, ioaddr + CSR13); - iowrite32(0x0003FF7F, ioaddr + CSR14); + iowrite32(0x0003FFFF, ioaddr + CSR14); iowrite16(8, ioaddr + CSR15); iowrite32(1, ioaddr + CSR13); } @@ -132,10 +141,14 @@ void t21142_lnk_change(struct net_device *dev, int csr5) struct tulip_private *tp = netdev_priv(dev); void __iomem *ioaddr = tp->base_addr; int csr12 = ioread32(ioaddr + CSR12); + int csr14 = ioread32(ioaddr + CSR14); + /* CSR12[LS10,LS100] are not reliable during autonegotiation */ + if ((csr14 & 0x80) && (csr12 & 0x7000) != 0x5000) + csr12 |= 6; if (tulip_debug > 1) printk(KERN_INFO"%s: 21143 link status interrupt %8.8x, CSR5 %x, " - "%8.8x.\n", dev->name, csr12, csr5, ioread32(ioaddr + CSR14)); + "%8.8x.\n", dev->name, csr12, csr5, csr14); /* If NWay finished and we have a negotiated partner capability. */ if (tp->nway && !tp->nwayset && (csr12 & 0x7000) == 0x5000) { @@ -143,7 +156,9 @@ void t21142_lnk_change(struct net_device *dev, int csr5) int negotiated = tp->sym_advertise & (csr12 >> 16); tp->lpar = csr12 >> 16; tp->nwayset = 1; - if (negotiated & 0x0100) dev->if_port = 5; + /* If partner cannot negotiate, it is 10Mbps Half Duplex */ + if (!(csr12 & 0x8000)) dev->if_port = 0; + else if (negotiated & 0x0100) dev->if_port = 5; else if (negotiated & 0x0080) dev->if_port = 3; else if (negotiated & 0x0040) dev->if_port = 4; else if (negotiated & 0x0020) dev->if_port = 0; @@ -214,7 +229,7 @@ void t21142_lnk_change(struct net_device *dev, int csr5) tp->timer.expires = RUN_AT(3*HZ); add_timer(&tp->timer); } else if (dev->if_port == 5) - iowrite32(ioread32(ioaddr + CSR14) & ~0x080, ioaddr + CSR14); + iowrite32(csr14 & ~0x080, ioaddr + CSR14); } else if (dev->if_port == 0 || dev->if_port == 4) { if ((csr12 & 4) == 0) printk(KERN_INFO"%s: 21143 10baseT link beat good.\n", -- cgit v1.2.3 From b9ec63f78b425c0e16cc95605b5d4ff2dc228b97 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 30 Jan 2009 00:00:24 -0500 Subject: ext4: Remove bogus BUG() check in ext4_bmap() The code to support journal-less ext4 operation added a BUG to ext4_bmap() which fired if there was no journal and the EXT4_STATE_JDATA bit was set in the i_state field. This caused running the filefrag program (which uses the FIMBAP ioctl) to trigger a BUG(). The EXT4_STATE_JDATA bit is only used for ext4_bmap(), and it's harmless for the bit to be set. We could add a check in __ext4_journalled_writepage() and ext4_journalled_write_end() to only set the EXT4_STATE_JDATA bit if the journal is present, but that adds an extra test and jump instruction. It's easier to simply remove the BUG check. http://bugzilla.kernel.org/show_bug.cgi?id=12568 Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/inode.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index b4386dafeb0..03ba20be132 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2821,9 +2821,6 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) filemap_write_and_wait(mapping); } - BUG_ON(!EXT4_JOURNAL(inode) && - EXT4_I(inode)->i_state & EXT4_STATE_JDATA); - if (EXT4_JOURNAL(inode) && EXT4_I(inode)->i_state & EXT4_STATE_JDATA) { /* * This is a REALLY heavyweight approach, but the use of -- cgit v1.2.3 From 7b24fc4d7eb611da367dea3aad45473050aacd6c Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Sun, 4 Jan 2009 02:43:38 -0500 Subject: block: Don't verify integrity metadata on read error If we get an I/O error on a read request there is no point in doing a verify pass on the integrity buffer. Adjust the completion path accordingly. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- fs/bio-integrity.c | 25 +++++++++++++++---------- include/linux/bio.h | 1 - 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 77ebc3c263d..8396d741f80 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -465,7 +465,7 @@ static int bio_integrity_verify(struct bio *bio) if (ret) { kunmap_atomic(kaddr, KM_USER0); - break; + return ret; } sectors = bv->bv_len / bi->sector_size; @@ -493,18 +493,13 @@ static void bio_integrity_verify_fn(struct work_struct *work) struct bio_integrity_payload *bip = container_of(work, struct bio_integrity_payload, bip_work); struct bio *bio = bip->bip_bio; - int error = bip->bip_error; + int error; - if (bio_integrity_verify(bio)) { - clear_bit(BIO_UPTODATE, &bio->bi_flags); - error = -EIO; - } + error = bio_integrity_verify(bio); /* Restore original bio completion handler */ bio->bi_end_io = bip->bip_end_io; - - if (bio->bi_end_io) - bio->bi_end_io(bio, error); + bio_endio(bio, error); } /** @@ -525,7 +520,17 @@ void bio_integrity_endio(struct bio *bio, int error) BUG_ON(bip->bip_bio != bio); - bip->bip_error = error; + /* In case of an I/O error there is no point in verifying the + * integrity metadata. Restore original bio end_io handler + * and run it. + */ + if (error) { + bio->bi_end_io = bip->bip_end_io; + bio_endio(bio, error); + + return; + } + INIT_WORK(&bip->bip_work, bio_integrity_verify_fn); queue_work(kintegrityd_wq, &bip->bip_work); } diff --git a/include/linux/bio.h b/include/linux/bio.h index 18462c5b8ff..18fc4a281a7 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -312,7 +312,6 @@ struct bio_integrity_payload { void *bip_buf; /* generated integrity data */ bio_end_io_t *bip_end_io; /* saved I/O completion fn */ - int bip_error; /* saved I/O error */ unsigned int bip_size; unsigned short bip_pool; /* pool the ivec came from */ -- cgit v1.2.3 From 8ae372e3bb4acaca37ffa2ce54f4cf8dd60a94fa Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Sun, 4 Jan 2009 02:43:39 -0500 Subject: block: Remove obsolete BUG_ON Now that bio_vecs are no longer cleared in bvec_alloc_bs() the following BUG_ON must go. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- fs/bio-integrity.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 8396d741f80..549b0144da1 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -140,7 +140,6 @@ int bio_integrity_add_page(struct bio *bio, struct page *page, iv = bip_vec_idx(bip, bip->bip_vcnt); BUG_ON(iv == NULL); - BUG_ON(iv->bv_page != NULL); iv->bv_page = page; iv->bv_len = len; -- cgit v1.2.3 From 322316385dde5cd879e682bcb598c56d0659fb60 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Sun, 4 Jan 2009 02:43:40 -0500 Subject: block: Allow empty integrity profile Allow a block device to allocate and register an integrity profile without providing a template. This allows DM to preallocate a profile to avoid deadlocks during table reconfiguration. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/blk-integrity.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 61a8e2f8fdd..91fa8e06b6a 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -309,24 +309,24 @@ static struct kobj_type integrity_ktype = { /** * blk_integrity_register - Register a gendisk as being integrity-capable * @disk: struct gendisk pointer to make integrity-aware - * @template: integrity profile + * @template: optional integrity profile to register * * Description: When a device needs to advertise itself as being able * to send/receive integrity metadata it must use this function to * register the capability with the block layer. The template is a * blk_integrity struct with values appropriate for the underlying - * hardware. See Documentation/block/data-integrity.txt. + * hardware. If template is NULL the new profile is allocated but + * not filled out. See Documentation/block/data-integrity.txt. */ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template) { struct blk_integrity *bi; BUG_ON(disk == NULL); - BUG_ON(template == NULL); if (disk->integrity == NULL) { bi = kmem_cache_alloc(integrity_cachep, - GFP_KERNEL | __GFP_ZERO); + GFP_KERNEL | __GFP_ZERO); if (!bi) return -1; @@ -346,13 +346,16 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template) bi = disk->integrity; /* Use the provided profile as template */ - bi->name = template->name; - bi->generate_fn = template->generate_fn; - bi->verify_fn = template->verify_fn; - bi->tuple_size = template->tuple_size; - bi->set_tag_fn = template->set_tag_fn; - bi->get_tag_fn = template->get_tag_fn; - bi->tag_size = template->tag_size; + if (template != NULL) { + bi->name = template->name; + bi->generate_fn = template->generate_fn; + bi->verify_fn = template->verify_fn; + bi->tuple_size = template->tuple_size; + bi->set_tag_fn = template->set_tag_fn; + bi->get_tag_fn = template->get_tag_fn; + bi->tag_size = template->tag_size; + } else + bi->name = "unsupported"; return 0; } -- cgit v1.2.3 From f48fc4d32e24c0b6a18aad30305d819bcc68c049 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 5 Jan 2009 10:17:25 +0100 Subject: block: get rid of the manual directory counting in blktrace It can result in a stuck blktrace system, if --kill is used. Signed-off-by: Jens Axboe --- block/blktrace.c | 72 +++++++++++++++++--------------------------------------- 1 file changed, 21 insertions(+), 51 deletions(-) diff --git a/block/blktrace.c b/block/blktrace.c index b0a2cae886d..39cc3bfe56e 100644 --- a/block/blktrace.c +++ b/block/blktrace.c @@ -187,59 +187,12 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, static struct dentry *blk_tree_root; static DEFINE_MUTEX(blk_tree_mutex); -static unsigned int root_users; - -static inline void blk_remove_root(void) -{ - if (blk_tree_root) { - debugfs_remove(blk_tree_root); - blk_tree_root = NULL; - } -} - -static void blk_remove_tree(struct dentry *dir) -{ - mutex_lock(&blk_tree_mutex); - debugfs_remove(dir); - if (--root_users == 0) - blk_remove_root(); - mutex_unlock(&blk_tree_mutex); -} - -static struct dentry *blk_create_tree(const char *blk_name) -{ - struct dentry *dir = NULL; - int created = 0; - - mutex_lock(&blk_tree_mutex); - - if (!blk_tree_root) { - blk_tree_root = debugfs_create_dir("block", NULL); - if (!blk_tree_root) - goto err; - created = 1; - } - - dir = debugfs_create_dir(blk_name, blk_tree_root); - if (dir) - root_users++; - else { - /* Delete root only if we created it */ - if (created) - blk_remove_root(); - } - -err: - mutex_unlock(&blk_tree_mutex); - return dir; -} static void blk_trace_cleanup(struct blk_trace *bt) { - relay_close(bt->rchan); debugfs_remove(bt->msg_file); debugfs_remove(bt->dropped_file); - blk_remove_tree(bt->dir); + relay_close(bt->rchan); free_percpu(bt->sequence); free_percpu(bt->msg_data); kfree(bt); @@ -346,7 +299,18 @@ static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf, static int blk_remove_buf_file_callback(struct dentry *dentry) { + struct dentry *parent = dentry->d_parent; debugfs_remove(dentry); + + /* + * this will fail for all but the last file, but that is ok. what we + * care about is the top level buts->name directory going away, when + * the last trace file is gone. Then we don't have to rmdir() that + * manually on trace stop, so it nicely solves the issue with + * force killing of running traces. + */ + + debugfs_remove(parent); return 0; } @@ -404,7 +368,15 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, goto err; ret = -ENOENT; - dir = blk_create_tree(buts->name); + + if (!blk_tree_root) { + blk_tree_root = debugfs_create_dir("block", NULL); + if (!blk_tree_root) + return -ENOMEM; + } + + dir = debugfs_create_dir(buts->name, blk_tree_root); + if (!dir) goto err; @@ -458,8 +430,6 @@ probe_err: atomic_dec(&blk_probes_ref); mutex_unlock(&blk_probe_mutex); err: - if (dir) - blk_remove_tree(dir); if (bt) { if (bt->msg_file) debugfs_remove(bt->msg_file); -- cgit v1.2.3 From 16642eb68216d8e0e136a99e514e9166e7125838 Mon Sep 17 00:00:00 2001 From: Alberto Bertogli Date: Mon, 5 Jan 2009 10:18:53 +0100 Subject: Fix small typo in bio.h's documentation Signed-off-by: Alberto Bertogli Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/bio.h b/include/linux/bio.h index 18fc4a281a7..5175aa3103c 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -144,7 +144,7 @@ struct bio { * bit 1 -- rw-ahead when set * bit 2 -- barrier * Insert a serialization point in the IO queue, forcing previously - * submitted IO to be completed before this oen is issued. + * submitted IO to be completed before this one is issued. * bit 3 -- synchronous I/O hint: the block layer will unplug immediately * Note that this does NOT indicate that the IO itself is sync, just * that the block layer will not postpone issue of this IO by plugging. -- cgit v1.2.3 From 1308835ffffe6d61ad1f48c5c381c9cc47f683ec Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 7 Jan 2009 12:22:39 +0100 Subject: block: export SSD/non-rotational queue flag through sysfs For some devices (i.e. CFA ATA) we can't reliably detect whether the device is of rotational or non-rotational type so we need to leave the final decision about this setting to the user-space. As a bonus do a minor CodingStyle fixup in queue_nomerges_store(). Suggested-by: Alan Cox Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Jens Axboe --- block/blk-sysfs.c | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index a29cb788e40..b538ab8dbbd 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -130,6 +130,27 @@ static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page) return queue_var_show(max_hw_sectors_kb, (page)); } +static ssize_t queue_nonrot_show(struct request_queue *q, char *page) +{ + return queue_var_show(!blk_queue_nonrot(q), page); +} + +static ssize_t queue_nonrot_store(struct request_queue *q, const char *page, + size_t count) +{ + unsigned long nm; + ssize_t ret = queue_var_store(&nm, page, count); + + spin_lock_irq(q->queue_lock); + if (nm) + queue_flag_clear(QUEUE_FLAG_NONROT, q); + else + queue_flag_set(QUEUE_FLAG_NONROT, q); + spin_unlock_irq(q->queue_lock); + + return ret; +} + static ssize_t queue_nomerges_show(struct request_queue *q, char *page) { return queue_var_show(blk_queue_nomerges(q), page); @@ -146,8 +167,8 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page, queue_flag_set(QUEUE_FLAG_NOMERGES, q); else queue_flag_clear(QUEUE_FLAG_NOMERGES, q); - spin_unlock_irq(q->queue_lock); + return ret; } @@ -210,6 +231,12 @@ static struct queue_sysfs_entry queue_hw_sector_size_entry = { .show = queue_hw_sector_size_show, }; +static struct queue_sysfs_entry queue_nonrot_entry = { + .attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR }, + .show = queue_nonrot_show, + .store = queue_nonrot_store, +}; + static struct queue_sysfs_entry queue_nomerges_entry = { .attr = {.name = "nomerges", .mode = S_IRUGO | S_IWUSR }, .show = queue_nomerges_show, @@ -229,6 +256,7 @@ static struct attribute *default_attrs[] = { &queue_max_sectors_entry.attr, &queue_iosched_entry.attr, &queue_hw_sector_size_entry.attr, + &queue_nonrot_entry.attr, &queue_nomerges_entry.attr, &queue_rq_affinity_entry.attr, NULL, -- cgit v1.2.3 From 213d9417fec62ef4c3675621b9364a667954d4dd Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 6 Jan 2009 09:16:05 +0100 Subject: block: seperate bio/request unplug and sync bits Signed-off-by: Jens Axboe --- block/blk-core.c | 5 ++++- include/linux/bio.h | 18 +++++++++++------- include/linux/blkdev.h | 2 ++ 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index a824e49c0d0..9e2e86fb78b 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1125,6 +1125,8 @@ void init_request_from_bio(struct request *req, struct bio *bio) if (bio_sync(bio)) req->cmd_flags |= REQ_RW_SYNC; + if (bio_unplug(bio)) + req->cmd_flags |= REQ_UNPLUG; if (bio_rw_meta(bio)) req->cmd_flags |= REQ_RW_META; @@ -1141,6 +1143,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) int el_ret, nr_sectors; const unsigned short prio = bio_prio(bio); const int sync = bio_sync(bio); + const int unplug = bio_unplug(bio); int rw_flags; nr_sectors = bio_sectors(bio); @@ -1244,7 +1247,7 @@ get_rq: blk_plug_device(q); add_request(q, req); out: - if (sync || blk_queue_nonrot(q)) + if (unplug || blk_queue_nonrot(q)) __generic_unplug_device(q); spin_unlock_irq(q->queue_lock); return 0; diff --git a/include/linux/bio.h b/include/linux/bio.h index 5175aa3103c..f53568c5852 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -163,12 +163,15 @@ struct bio { #define BIO_RW 0 /* Must match RW in req flags (blkdev.h) */ #define BIO_RW_AHEAD 1 /* Must match FAILFAST in req flags */ #define BIO_RW_BARRIER 2 -#define BIO_RW_SYNC 3 -#define BIO_RW_META 4 -#define BIO_RW_DISCARD 5 -#define BIO_RW_FAILFAST_DEV 6 -#define BIO_RW_FAILFAST_TRANSPORT 7 -#define BIO_RW_FAILFAST_DRIVER 8 +#define BIO_RW_SYNCIO 3 +#define BIO_RW_UNPLUG 4 +#define BIO_RW_META 5 +#define BIO_RW_DISCARD 6 +#define BIO_RW_FAILFAST_DEV 7 +#define BIO_RW_FAILFAST_TRANSPORT 8 +#define BIO_RW_FAILFAST_DRIVER 9 + +#define BIO_RW_SYNC (BIO_RW_SYNCIO | BIO_RW_UNPLUG) /* * upper 16 bits of bi_rw define the io priority of this bio @@ -194,7 +197,8 @@ struct bio { #define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx) #define bio_sectors(bio) ((bio)->bi_size >> 9) #define bio_barrier(bio) ((bio)->bi_rw & (1 << BIO_RW_BARRIER)) -#define bio_sync(bio) ((bio)->bi_rw & (1 << BIO_RW_SYNC)) +#define bio_sync(bio) ((bio)->bi_rw & (1 << BIO_RW_SYNCIO)) +#define bio_unplug(bio) ((bio)->bi_rw & (1 << BIO_RW_UNPLUG)) #define bio_failfast_dev(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST_DEV)) #define bio_failfast_transport(bio) \ ((bio)->bi_rw & (1 << BIO_RW_FAILFAST_TRANSPORT)) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 044467ef7b1..75426e4b8cd 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -108,6 +108,7 @@ enum rq_flag_bits { __REQ_RW_META, /* metadata io request */ __REQ_COPY_USER, /* contains copies of user pages */ __REQ_INTEGRITY, /* integrity metadata has been remapped */ + __REQ_UNPLUG, /* unplug queue on submission */ __REQ_NR_BITS, /* stops here */ }; @@ -134,6 +135,7 @@ enum rq_flag_bits { #define REQ_RW_META (1 << __REQ_RW_META) #define REQ_COPY_USER (1 << __REQ_COPY_USER) #define REQ_INTEGRITY (1 << __REQ_INTEGRITY) +#define REQ_UNPLUG (1 << __REQ_UNPLUG) #define BLK_MAX_CDB 16 -- cgit v1.2.3 From 1dfa17f4ab8543d82caf4d36636b93916a18f456 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 6 Jan 2009 09:21:49 +0100 Subject: block: add bio_rw_flagged() for testing bio->bi_rw The existing functions for checking bio->bi_rw are badly named. So lets mirror what we do for bio->bi_flags testing, use a properly named function so that it's immediately obvious what is being tested. Maintain compatability names for the old macros, eventually we'll get rid of these. Signed-off-by: Jens Axboe --- include/linux/bio.h | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/include/linux/bio.h b/include/linux/bio.h index f53568c5852..0942765cf8c 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -173,6 +173,24 @@ struct bio { #define BIO_RW_SYNC (BIO_RW_SYNCIO | BIO_RW_UNPLUG) +#define bio_rw_flagged(bio, flag) ((bio)->bi_rw & (1 << (flag))) + +/* + * Old defines, these should eventually be replaced by direct usage of + * bio_rw_flagged() + */ +#define bio_barrier(bio) bio_rw_flagged(bio, BIO_RW_BARRIER) +#define bio_sync(bio) bio_rw_flagged(bio, BIO_RW_SYNCIO) +#define bio_unplug(bio) bio_rw_flagged(bio, BIO_RW_UNPLUG) +#define bio_failfast_dev(bio) bio_rw_flagged(bio, BIO_RW_FAILFAST_DEV) +#define bio_failfast_transport(bio) \ + bio_rw_flagged(bio, BIO_RW_FAILFAST_TRANSPORT) +#define bio_failfast_driver(bio) \ + bio_rw_flagged(bio, BIO_RW_FAILFAST_DRIVER) +#define bio_rw_ahead(bio) bio_rw_flagged(bio, BIO_RW_AHEAD) +#define bio_rw_meta(bio) bio_rw_flagged(bio, BIO_RW_META) +#define bio_discard(bio) bio_rw_flagged(bio, BIO_RW_DISCARD) + /* * upper 16 bits of bi_rw define the io priority of this bio */ @@ -196,16 +214,6 @@ struct bio { #define bio_offset(bio) bio_iovec((bio))->bv_offset #define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx) #define bio_sectors(bio) ((bio)->bi_size >> 9) -#define bio_barrier(bio) ((bio)->bi_rw & (1 << BIO_RW_BARRIER)) -#define bio_sync(bio) ((bio)->bi_rw & (1 << BIO_RW_SYNCIO)) -#define bio_unplug(bio) ((bio)->bi_rw & (1 << BIO_RW_UNPLUG)) -#define bio_failfast_dev(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST_DEV)) -#define bio_failfast_transport(bio) \ - ((bio)->bi_rw & (1 << BIO_RW_FAILFAST_TRANSPORT)) -#define bio_failfast_driver(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST_DRIVER)) -#define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD)) -#define bio_rw_meta(bio) ((bio)->bi_rw & (1 << BIO_RW_META)) -#define bio_discard(bio) ((bio)->bi_rw & (1 << BIO_RW_DISCARD)) #define bio_empty_barrier(bio) (bio_barrier(bio) && !bio_has_data(bio) && !bio_discard(bio)) static inline unsigned int bio_cur_sectors(struct bio *bio) -- cgit v1.2.3 From dbdac9b71dff5d27885f82eb2cfca310861fdf9e Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 13 Jan 2009 15:27:32 +0100 Subject: block: Fix documentation for blkdev_issue_flush() Signed-off-by: "Theodore Ts'o" Signed-off-by: Jens Axboe --- block/blk-barrier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-barrier.c b/block/blk-barrier.c index 8eba4e43bb0..f7dae57e6ca 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -302,7 +302,7 @@ static void bio_end_empty_barrier(struct bio *bio, int err) * Description: * Issue a flush for the block device in question. Caller can supply * room for storing the error offset in case of a flush error, if they - * wish to. Caller must run wait_for_completion() on its own. + * wish to. */ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) { -- cgit v1.2.3 From cec0707e40ae25794b5a2de7b7f03c51961f80d9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 13 Jan 2009 15:28:32 +0100 Subject: block: silently error an unsupported barrier bio This fixes a "regression" from 2.6.28, where the barrier probes that file systems may do would trigger additional end request warnings in dmesg. Signed-off-by: Jens Axboe --- block/blk-core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/block/blk-core.c b/block/blk-core.c index 9e2e86fb78b..ae75c047f45 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1451,6 +1451,11 @@ static inline void __generic_make_request(struct bio *bio) err = -EOPNOTSUPP; goto end_io; } + if (bio_barrier(bio) && bio_has_data(bio) && + (q->next_ordered == QUEUE_ORDERED_NONE)) { + err = -EOPNOTSUPP; + goto end_io; + } ret = q->make_request_fn(q, bio); } while (ret); -- cgit v1.2.3 From a229fc61ef0ee3c30fd193beee0eeb87410227f1 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Mon, 19 Jan 2009 10:37:38 +0100 Subject: include/linux: Add bsg.h to the Kernel exported headers bsg.h in current form is perfectly suitable for user-mode consumption. It is needed together with scsi/sg.h for applications that want to interface with the bsg driver. Currently the few projects that use it would copy it over into the projects. But that is not acceptable for projects that need to provide source and devel packages for distros. This should also be submitted to stable 2.6.28 and 2.6.27 since bsg had a stable API since these Kernels and distro users will need the header for these kernels a swell Signed-off-by: Boaz Harrosh Acked-by: FUJITA Tomonori CC: stable@kernel.org Signed-off-by: Jens Axboe --- include/linux/Kbuild | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 12e9a2957ca..2124c063a7e 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -41,6 +41,7 @@ header-y += baycom.h header-y += bfs_fs.h header-y += blkpg.h header-y += bpqether.h +header-y += bsg.h header-y += can.h header-y += cdk.h header-y += chio.h -- cgit v1.2.3 From 7598909e3ee2a08726276d6415b69dadb52d0d76 Mon Sep 17 00:00:00 2001 From: Nikanth Karthikesan Date: Tue, 27 Jan 2009 09:29:24 +0100 Subject: Mark mandatory elevator functions in the biodoc.txt biodoc.txt mentions that elevator functions marked with * are mandatory, but no function is marked with *. Mark the 3 functions which should be implemented by any io scheduler. Signed-off-by: Nikanth Karthikesan Signed-off-by: Jens Axboe --- Documentation/block/biodoc.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt index 5d2480d33b4..ecad6ee7570 100644 --- a/Documentation/block/biodoc.txt +++ b/Documentation/block/biodoc.txt @@ -954,14 +954,14 @@ elevator_allow_merge_fn called whenever the block layer determines results in some sort of conflict internally, this hook allows it to do that. -elevator_dispatch_fn fills the dispatch queue with ready requests. +elevator_dispatch_fn* fills the dispatch queue with ready requests. I/O schedulers are free to postpone requests by not filling the dispatch queue unless @force is non-zero. Once dispatched, I/O schedulers are not allowed to manipulate the requests - they belong to generic dispatch queue. -elevator_add_req_fn called to add a new request into the scheduler +elevator_add_req_fn* called to add a new request into the scheduler elevator_queue_empty_fn returns true if the merge queue is empty. Drivers shouldn't use this, but rather check @@ -991,7 +991,7 @@ elevator_activate_req_fn Called when device driver first sees a request. elevator_deactivate_req_fn Called when device driver decides to delay a request by requeueing it. -elevator_init_fn +elevator_init_fn* elevator_exit_fn Allocate and free any elevator specific storage for a queue. -- cgit v1.2.3 From bc58ba9468d94d62c56ab9b47173583ec140b165 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 23 Jan 2009 10:54:44 +0100 Subject: block: add sysfs file for controlling io stats accounting This allows us to turn off disk stat accounting completely, for the cases where the 0.5-1% reduction in system time is important. Signed-off-by: Jens Axboe --- block/blk-core.c | 90 ++++++++++++++++++++++++++++++-------------------- block/blk-sysfs.c | 28 ++++++++++++++++ include/linux/blkdev.h | 6 ++++ 3 files changed, 88 insertions(+), 36 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index ae75c047f45..ca69f3d9410 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -64,11 +64,12 @@ static struct workqueue_struct *kblockd_workqueue; static void drive_stat_acct(struct request *rq, int new_io) { + struct gendisk *disk = rq->rq_disk; struct hd_struct *part; int rw = rq_data_dir(rq); int cpu; - if (!blk_fs_request(rq) || !rq->rq_disk) + if (!blk_fs_request(rq) || !disk || !blk_queue_io_stat(disk->queue)) return; cpu = part_stat_lock(); @@ -599,8 +600,7 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) q->request_fn = rfn; q->prep_rq_fn = NULL; q->unplug_fn = generic_unplug_device; - q->queue_flags = (1 << QUEUE_FLAG_CLUSTER | - 1 << QUEUE_FLAG_STACKABLE); + q->queue_flags = QUEUE_FLAG_DEFAULT; q->queue_lock = lock; blk_queue_segment_boundary(q, BLK_SEG_BOUNDARY_MASK); @@ -1663,6 +1663,55 @@ void blkdev_dequeue_request(struct request *req) } EXPORT_SYMBOL(blkdev_dequeue_request); +static void blk_account_io_completion(struct request *req, unsigned int bytes) +{ + struct gendisk *disk = req->rq_disk; + + if (!disk || !blk_queue_io_stat(disk->queue)) + return; + + if (blk_fs_request(req)) { + const int rw = rq_data_dir(req); + struct hd_struct *part; + int cpu; + + cpu = part_stat_lock(); + part = disk_map_sector_rcu(req->rq_disk, req->sector); + part_stat_add(cpu, part, sectors[rw], bytes >> 9); + part_stat_unlock(); + } +} + +static void blk_account_io_done(struct request *req) +{ + struct gendisk *disk = req->rq_disk; + + if (!disk || !blk_queue_io_stat(disk->queue)) + return; + + /* + * Account IO completion. bar_rq isn't accounted as a normal + * IO on queueing nor completion. Accounting the containing + * request is enough. + */ + if (blk_fs_request(req) && req != &req->q->bar_rq) { + unsigned long duration = jiffies - req->start_time; + const int rw = rq_data_dir(req); + struct hd_struct *part; + int cpu; + + cpu = part_stat_lock(); + part = disk_map_sector_rcu(disk, req->sector); + + part_stat_inc(cpu, part, ios[rw]); + part_stat_add(cpu, part, ticks[rw], duration); + part_round_stats(cpu, part); + part_dec_in_flight(part); + + part_stat_unlock(); + } +} + /** * __end_that_request_first - end I/O on a request * @req: the request being processed @@ -1698,16 +1747,7 @@ static int __end_that_request_first(struct request *req, int error, (unsigned long long)req->sector); } - if (blk_fs_request(req) && req->rq_disk) { - const int rw = rq_data_dir(req); - struct hd_struct *part; - int cpu; - - cpu = part_stat_lock(); - part = disk_map_sector_rcu(req->rq_disk, req->sector); - part_stat_add(cpu, part, sectors[rw], nr_bytes >> 9); - part_stat_unlock(); - } + blk_account_io_completion(req, nr_bytes); total_bytes = bio_nbytes = 0; while ((bio = req->bio) != NULL) { @@ -1787,8 +1827,6 @@ static int __end_that_request_first(struct request *req, int error, */ static void end_that_request_last(struct request *req, int error) { - struct gendisk *disk = req->rq_disk; - if (blk_rq_tagged(req)) blk_queue_end_tag(req->q, req); @@ -1800,27 +1838,7 @@ static void end_that_request_last(struct request *req, int error) blk_delete_timer(req); - /* - * Account IO completion. bar_rq isn't accounted as a normal - * IO on queueing nor completion. Accounting the containing - * request is enough. - */ - if (disk && blk_fs_request(req) && req != &req->q->bar_rq) { - unsigned long duration = jiffies - req->start_time; - const int rw = rq_data_dir(req); - struct hd_struct *part; - int cpu; - - cpu = part_stat_lock(); - part = disk_map_sector_rcu(disk, req->sector); - - part_stat_inc(cpu, part, ios[rw]); - part_stat_add(cpu, part, ticks[rw], duration); - part_round_stats(cpu, part); - part_dec_in_flight(part); - - part_stat_unlock(); - } + blk_account_io_done(req); if (req->end_io) req->end_io(req, error); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index b538ab8dbbd..e29ddfc73cf 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -197,6 +197,27 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count) return ret; } +static ssize_t queue_iostats_show(struct request_queue *q, char *page) +{ + return queue_var_show(blk_queue_io_stat(q), page); +} + +static ssize_t queue_iostats_store(struct request_queue *q, const char *page, + size_t count) +{ + unsigned long stats; + ssize_t ret = queue_var_store(&stats, page, count); + + spin_lock_irq(q->queue_lock); + if (stats) + queue_flag_set(QUEUE_FLAG_IO_STAT, q); + else + queue_flag_clear(QUEUE_FLAG_IO_STAT, q); + spin_unlock_irq(q->queue_lock); + + return ret; +} + static struct queue_sysfs_entry queue_requests_entry = { .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, .show = queue_requests_show, @@ -249,6 +270,12 @@ static struct queue_sysfs_entry queue_rq_affinity_entry = { .store = queue_rq_affinity_store, }; +static struct queue_sysfs_entry queue_iostats_entry = { + .attr = {.name = "iostats", .mode = S_IRUGO | S_IWUSR }, + .show = queue_iostats_show, + .store = queue_iostats_store, +}; + static struct attribute *default_attrs[] = { &queue_requests_entry.attr, &queue_ra_entry.attr, @@ -259,6 +286,7 @@ static struct attribute *default_attrs[] = { &queue_nonrot_entry.attr, &queue_nomerges_entry.attr, &queue_rq_affinity_entry.attr, + &queue_iostats_entry.attr, NULL, }; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 75426e4b8cd..d08c4b8219a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -451,6 +451,11 @@ struct request_queue #define QUEUE_FLAG_STACKABLE 13 /* supports request stacking */ #define QUEUE_FLAG_NONROT 14 /* non-rotational device (SSD) */ #define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ +#define QUEUE_FLAG_IO_STAT 15 /* do IO stats */ + +#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ + (1 << QUEUE_FLAG_CLUSTER) | \ + 1 << QUEUE_FLAG_STACKABLE) static inline int queue_is_locked(struct request_queue *q) { @@ -567,6 +572,7 @@ enum { #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) +#define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) #define blk_queue_flushing(q) ((q)->ordseq) #define blk_queue_stackable(q) \ test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) -- cgit v1.2.3 From 3a9a3f6cc55418dd1525e636dccbbe13c394f652 Mon Sep 17 00:00:00 2001 From: Divyesh Shah Date: Fri, 30 Jan 2009 12:46:41 +0100 Subject: cfq-iosched: Allow RT requests to pre-empt ongoing BE timeslice This patch adds the ability to pre-empt an ongoing BE timeslice when a RT request is waiting for the current timeslice to complete. This reduces the wait time to disk for RT requests from an upper bound of 4 (current value of cfq_quantum) to 1 disk request. Applied Jens' suggeested changes to avoid the rb lookup and use !cfq_class_rt() and retested. Latency(secs) for the RT task when doing sequential reads from 10G file. | only RT | RT + BE | RT + BE + this patch small (512 byte) reads | 143 | 163 | 145 large (1Mb) reads | 142 | 158 | 146 Signed-off-by: Divyesh Shah Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index e8525fa7282..664ebfd092e 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -84,6 +84,11 @@ struct cfq_data { */ struct cfq_rb_root service_tree; unsigned int busy_queues; + /* + * Used to track any pending rt requests so we can pre-empt current + * non-RT cfqq in service when this value is non-zero. + */ + unsigned int busy_rt_queues; int rq_in_driver; int sync_flight; @@ -562,6 +567,8 @@ static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) BUG_ON(cfq_cfqq_on_rr(cfqq)); cfq_mark_cfqq_on_rr(cfqq); cfqd->busy_queues++; + if (cfq_class_rt(cfqq)) + cfqd->busy_rt_queues++; cfq_resort_rr_list(cfqd, cfqq); } @@ -581,6 +588,8 @@ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) BUG_ON(!cfqd->busy_queues); cfqd->busy_queues--; + if (cfq_class_rt(cfqq)) + cfqd->busy_rt_queues--; } /* @@ -1004,6 +1013,20 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) if (cfq_slice_used(cfqq)) goto expire; + /* + * If we have a RT cfqq waiting, then we pre-empt the current non-rt + * cfqq. + */ + if (!cfq_class_rt(cfqq) && cfqd->busy_rt_queues) { + /* + * We simulate this as cfqq timed out so that it gets to bank + * the remaining of its time slice. + */ + cfq_log_cfqq(cfqd, cfqq, "preempt"); + cfq_slice_expired(cfqd, 1); + goto new_queue; + } + /* * The active queue has requests and isn't expired, allow it to * dispatch. @@ -1067,6 +1090,13 @@ __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq, if (RB_EMPTY_ROOT(&cfqq->sort_list)) break; + /* + * If there is a non-empty RT cfqq waiting for current + * cfqq's timeslice to complete, pre-empt this cfqq + */ + if (!cfq_class_rt(cfqq) && cfqd->busy_rt_queues) + break; + } while (dispatched < max_dispatch); /* @@ -1801,6 +1831,12 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, if (rq_is_meta(rq) && !cfqq->meta_pending) return 1; + /* + * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice. + */ + if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq)) + return 1; + if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq)) return 0; @@ -1870,7 +1906,8 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, /* * not the active queue - expire current slice if it is * idle and has expired it's mean thinktime or this new queue - * has some old slice time left and is of higher priority + * has some old slice time left and is of higher priority or + * this new queue is RT and the current one is BE */ cfq_preempt_queue(cfqd, cfqq); cfq_mark_cfqq_must_dispatch(cfqq); -- cgit v1.2.3 From 0461ec5bc7745b89a8ab67ba0ea497abd58a6301 Mon Sep 17 00:00:00 2001 From: Paul Larson Date: Fri, 30 Jan 2009 10:21:49 -0600 Subject: Add enable_ms to jsm driver This fixes a crash observed when non-existant enable_ms function is called for jsm driver. Signed-off-by: Scott Kilau Signed-off-by: Paul Larson Signed-off-by: Linus Torvalds --- drivers/serial/jsm/jsm_tty.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/serial/jsm/jsm_tty.c b/drivers/serial/jsm/jsm_tty.c index 3547558d2ca..324c74d2f66 100644 --- a/drivers/serial/jsm/jsm_tty.c +++ b/drivers/serial/jsm/jsm_tty.c @@ -161,6 +161,11 @@ static void jsm_tty_stop_rx(struct uart_port *port) channel->ch_bd->bd_ops->disable_receiver(channel); } +static void jsm_tty_enable_ms(struct uart_port *port) +{ + /* Nothing needed */ +} + static void jsm_tty_break(struct uart_port *port, int break_state) { unsigned long lock_flags; @@ -345,6 +350,7 @@ static struct uart_ops jsm_ops = { .start_tx = jsm_tty_start_tx, .send_xchar = jsm_tty_send_xchar, .stop_rx = jsm_tty_stop_rx, + .enable_ms = jsm_tty_enable_ms, .break_ctl = jsm_tty_break, .startup = jsm_tty_open, .shutdown = jsm_tty_close, -- cgit v1.2.3 From 33bfad54b58cf05cfe6678c3ec9235d4bc8db4c2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 30 Jan 2009 11:37:22 -0800 Subject: Allow opportunistic merging of VM_CAN_NONLINEAR areas Commit de33c8db5910cda599899dd431cc30d7c1018cbf ("Fix OOPS in mmap_region() when merging adjacent VM_LOCKED file segments") unified the vma merging of anonymous and file maps to just one place, which simplified the code and fixed a use-after-free bug that could cause an oops. But by doing the merge opportunistically before even having called ->mmap() on the file method, it now compares two different 'vm_flags' values: the pre-mmap() value of the new not-yet-formed vma, and previous mappings of the same file around it. And in doing so, it refused to merge the common file case, which adds a marker to say "I can be made non-linear". This fixes it by just adding a set of flags that don't have to match, because we know they are ok to merge. Currently it's only that single VM_CAN_NONLINEAR flag, but at least conceptually there could be others in the future. Reported-and-acked-by: Hugh Dickins Cc: Lee Schermerhorn Cc: Nick Piggin Cc: Andrew Morton Cc: Greg KH Signed-off-by: Linus Torvalds --- mm/mmap.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/mmap.c b/mm/mmap.c index d3fa10a726c..c581df14d0d 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -658,6 +658,9 @@ again: remove_next = 1 + (end > next->vm_end); validate_mm(mm); } +/* Flags that can be inherited from an existing mapping when merging */ +#define VM_MERGEABLE_FLAGS (VM_CAN_NONLINEAR) + /* * If the vma has a ->close operation then the driver probably needs to release * per-vma resources, so we don't attempt to merge those. @@ -665,7 +668,7 @@ again: remove_next = 1 + (end > next->vm_end); static inline int is_mergeable_vma(struct vm_area_struct *vma, struct file *file, unsigned long vm_flags) { - if (vma->vm_flags != vm_flags) + if ((vma->vm_flags ^ vm_flags) & ~VM_MERGEABLE_FLAGS) return 0; if (vma->vm_file != file) return 0; -- cgit v1.2.3