diff options
Diffstat (limited to 'drivers/net/ixgbe')
-rw-r--r-- | drivers/net/ixgbe/ixgbe.h | 87 | ||||
-rw-r--r-- | drivers/net/ixgbe/ixgbe_ethtool.c | 39 | ||||
-rw-r--r-- | drivers/net/ixgbe/ixgbe_main.c | 1563 |
3 files changed, 1314 insertions, 375 deletions
diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h index d0bf206632c..d98113472a8 100644 --- a/drivers/net/ixgbe/ixgbe.h +++ b/drivers/net/ixgbe/ixgbe.h @@ -36,6 +36,9 @@ #include "ixgbe_type.h" #include "ixgbe_common.h" +#ifdef CONFIG_DCA +#include <linux/dca.h> +#endif #define IXGBE_ERR(args...) printk(KERN_ERR "ixgbe: " args) @@ -120,7 +123,6 @@ struct ixgbe_queue_stats { }; struct ixgbe_ring { - struct ixgbe_adapter *adapter; /* backlink */ void *desc; /* descriptor ring memory */ dma_addr_t dma; /* phys. address of descriptor ring */ unsigned int size; /* length in bytes */ @@ -128,6 +130,7 @@ struct ixgbe_ring { unsigned int next_to_use; unsigned int next_to_clean; + int queue_index; /* needed for multiqueue queue management */ union { struct ixgbe_tx_buffer *tx_buffer_info; struct ixgbe_rx_buffer *rx_buffer_info; @@ -136,8 +139,21 @@ struct ixgbe_ring { u16 head; u16 tail; + unsigned int total_bytes; + unsigned int total_packets; + u16 reg_idx; /* holds the special value that gets the hardware register + * offset associated with this ring, which is different + * for DCE and RSS modes */ + +#ifdef CONFIG_DCA + /* cpu for tx queue */ + int cpu; +#endif struct ixgbe_queue_stats stats; + u8 v_idx; /* maps directly to the index for this ring in the hardware + * vector array, can also be used for finding the bit in EICR + * and friends that represents the vector for this ring */ u32 eims_value; u16 itr_register; @@ -146,6 +162,33 @@ struct ixgbe_ring { u16 work_limit; /* max work per interrupt */ }; +#define RING_F_VMDQ 1 +#define RING_F_RSS 2 +#define IXGBE_MAX_RSS_INDICES 16 +#define IXGBE_MAX_VMDQ_INDICES 16 +struct ixgbe_ring_feature { + int indices; + int mask; +}; + +#define MAX_RX_QUEUES 64 +#define MAX_TX_QUEUES 32 + +/* MAX_MSIX_Q_VECTORS of these are allocated, + * but we only use one per queue-specific vector. + */ +struct ixgbe_q_vector { + struct ixgbe_adapter *adapter; + struct napi_struct napi; + DECLARE_BITMAP(rxr_idx, MAX_RX_QUEUES); /* Rx ring indices */ + DECLARE_BITMAP(txr_idx, MAX_TX_QUEUES); /* Tx ring indices */ + u8 rxr_count; /* Rx ring count assigned to this vector */ + u8 txr_count; /* Tx ring count assigned to this vector */ + u8 tx_eitr; + u8 rx_eitr; + u32 eitr; +}; + /* Helper macros to switch between ints/sec and what the register uses. * And yes, it's the same math going both ways. */ @@ -166,6 +209,14 @@ struct ixgbe_ring { #define IXGBE_MAX_JUMBO_FRAME_SIZE 16128 +#define OTHER_VECTOR 1 +#define NON_Q_VECTORS (OTHER_VECTOR) + +#define MAX_MSIX_Q_VECTORS 16 +#define MIN_MSIX_Q_VECTORS 2 +#define MAX_MSIX_COUNT (MAX_MSIX_Q_VECTORS + NON_Q_VECTORS) +#define MIN_MSIX_COUNT (MIN_MSIX_Q_VECTORS + NON_Q_VECTORS) + /* board specific private data structure */ struct ixgbe_adapter { struct timer_list watchdog_timer; @@ -173,10 +224,16 @@ struct ixgbe_adapter { u16 bd_number; u16 rx_buf_len; struct work_struct reset_task; + struct ixgbe_q_vector q_vector[MAX_MSIX_Q_VECTORS]; + char name[MAX_MSIX_COUNT][IFNAMSIZ + 5]; + + /* Interrupt Throttle Rate */ + u32 itr_setting; + u16 eitr_low; + u16 eitr_high; /* TX */ struct ixgbe_ring *tx_ring; /* One per active queue */ - struct napi_struct napi; u64 restart_queue; u64 lsc_int; u64 hw_tso_ctxt; @@ -192,22 +249,27 @@ struct ixgbe_adapter { u64 non_eop_descs; int num_tx_queues; int num_rx_queues; + int num_msix_vectors; + struct ixgbe_ring_feature ring_feature[3]; struct msix_entry *msix_entries; u64 rx_hdr_split; u32 alloc_rx_page_failed; u32 alloc_rx_buff_failed; + /* Some features need tri-state capability, + * thus the additional *_CAPABLE flags. + */ u32 flags; -#define IXGBE_FLAG_RX_CSUM_ENABLED (u32)(1) +#define IXGBE_FLAG_RX_CSUM_ENABLED (u32)(1 << 0) #define IXGBE_FLAG_MSI_ENABLED (u32)(1 << 1) -#define IXGBE_FLAG_MSIX_ENABLED (u32)(1 << 2) -#define IXGBE_FLAG_RX_PS_ENABLED (u32)(1 << 3) -#define IXGBE_FLAG_IN_NETPOLL (u32)(1 << 4) - - /* Interrupt Throttle Rate */ - u32 rx_eitr; - u32 tx_eitr; +#define IXGBE_FLAG_MSIX_ENABLED (u32)(1 << 2) +#define IXGBE_FLAG_RX_PS_ENABLED (u32)(1 << 3) +#define IXGBE_FLAG_IN_NETPOLL (u32)(1 << 4) +#define IXGBE_FLAG_IMIR_ENABLED (u32)(1 << 5) +#define IXGBE_FLAG_RSS_ENABLED (u32)(1 << 6) +#define IXGBE_FLAG_VMDQ_ENABLED (u32)(1 << 7) +#define IXGBE_FLAG_DCA_ENABLED (u32)(1 << 8) /* OS defined structs */ struct net_device *netdev; @@ -218,7 +280,10 @@ struct ixgbe_adapter { struct ixgbe_hw hw; u16 msg_enable; struct ixgbe_hw_stats stats; - char lsc_name[IFNAMSIZ + 5]; + + /* Interrupt Throttle Rate */ + u32 rx_eitr; + u32 tx_eitr; unsigned long state; u64 tx_busy; diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c index a119cbd8dbb..4e463778bcf 100644 --- a/drivers/net/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ixgbe/ixgbe_ethtool.c @@ -246,13 +246,26 @@ static int ixgbe_set_tx_csum(struct net_device *netdev, u32 data) static int ixgbe_set_tso(struct net_device *netdev, u32 data) { - if (data) { netdev->features |= NETIF_F_TSO; netdev->features |= NETIF_F_TSO6; } else { +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + struct ixgbe_adapter *adapter = netdev_priv(netdev); + int i; +#endif + netif_stop_queue(netdev); +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + for (i = 0; i < adapter->num_tx_queues; i++) + netif_stop_subqueue(netdev, i); +#endif netdev->features &= ~NETIF_F_TSO; netdev->features &= ~NETIF_F_TSO6; +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + for (i = 0; i < adapter->num_tx_queues; i++) + netif_start_subqueue(netdev, i); +#endif + netif_start_queue(netdev); } return 0; } @@ -873,13 +886,13 @@ static int ixgbe_get_coalesce(struct net_device *netdev, { struct ixgbe_adapter *adapter = netdev_priv(netdev); - if (adapter->rx_eitr == 0) - ec->rx_coalesce_usecs = 0; + if (adapter->rx_eitr < IXGBE_MIN_ITR_USECS) + ec->rx_coalesce_usecs = adapter->rx_eitr; else ec->rx_coalesce_usecs = 1000000 / adapter->rx_eitr; - if (adapter->tx_eitr == 0) - ec->tx_coalesce_usecs = 0; + if (adapter->tx_eitr < IXGBE_MIN_ITR_USECS) + ec->tx_coalesce_usecs = adapter->tx_eitr; else ec->tx_coalesce_usecs = 1000000 / adapter->tx_eitr; @@ -893,22 +906,26 @@ static int ixgbe_set_coalesce(struct net_device *netdev, struct ixgbe_adapter *adapter = netdev_priv(netdev); if ((ec->rx_coalesce_usecs > IXGBE_MAX_ITR_USECS) || - ((ec->rx_coalesce_usecs > 0) && + ((ec->rx_coalesce_usecs != 0) && + (ec->rx_coalesce_usecs != 1) && + (ec->rx_coalesce_usecs != 3) && (ec->rx_coalesce_usecs < IXGBE_MIN_ITR_USECS))) return -EINVAL; if ((ec->tx_coalesce_usecs > IXGBE_MAX_ITR_USECS) || - ((ec->tx_coalesce_usecs > 0) && + ((ec->tx_coalesce_usecs != 0) && + (ec->tx_coalesce_usecs != 1) && + (ec->tx_coalesce_usecs != 3) && (ec->tx_coalesce_usecs < IXGBE_MIN_ITR_USECS))) return -EINVAL; /* convert to rate of irq's per second */ - if (ec->rx_coalesce_usecs == 0) - adapter->rx_eitr = 0; + if (ec->rx_coalesce_usecs < IXGBE_MIN_ITR_USECS) + adapter->rx_eitr = ec->rx_coalesce_usecs; else adapter->rx_eitr = (1000000 / ec->rx_coalesce_usecs); - if (ec->tx_coalesce_usecs == 0) - adapter->tx_eitr = 0; + if (ec->tx_coalesce_usecs < IXGBE_MIN_ITR_USECS) + adapter->tx_eitr = ec->rx_coalesce_usecs; else adapter->tx_eitr = (1000000 / ec->tx_coalesce_usecs); diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index ead49e54f31..cb371a8c24a 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -48,7 +48,7 @@ char ixgbe_driver_name[] = "ixgbe"; static const char ixgbe_driver_string[] = "Intel(R) 10 Gigabit PCI Express Network Driver"; -#define DRV_VERSION "1.1.18" +#define DRV_VERSION "1.3.18-k2" const char ixgbe_driver_version[] = DRV_VERSION; static const char ixgbe_copyright[] = "Copyright (c) 1999-2007 Intel Corporation."; @@ -80,6 +80,16 @@ static struct pci_device_id ixgbe_pci_tbl[] = { }; MODULE_DEVICE_TABLE(pci, ixgbe_pci_tbl); +#ifdef CONFIG_DCA +static int ixgbe_notify_dca(struct notifier_block *, unsigned long event, + void *p); +static struct notifier_block dca_notifier = { + .notifier_call = ixgbe_notify_dca, + .next = NULL, + .priority = 0 +}; +#endif + MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>"); MODULE_DESCRIPTION("Intel(R) 10 Gigabit PCI Express Network Driver"); MODULE_LICENSE("GPL"); @@ -220,7 +230,6 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_adapter *adapter, tx_ring->stats.bytes += tx_buffer_info->length; if (cleaned) { struct sk_buff *skb = tx_buffer_info->skb; -#ifdef NETIF_F_TSO unsigned int segs, bytecount; segs = skb_shinfo(skb)->gso_segs ?: 1; /* multiply data chunks by size of headers */ @@ -228,10 +237,6 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_adapter *adapter, skb->len; total_tx_packets += segs; total_tx_bytes += bytecount; -#else - total_tx_packets++; - total_tx_bytes += skb->len; -#endif } ixgbe_unmap_and_free_tx_resource(adapter, tx_buffer_info); @@ -261,26 +266,125 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_adapter *adapter, * sees the new next_to_clean. */ smp_mb(); +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) && + !test_bit(__IXGBE_DOWN, &adapter->state)) { + netif_wake_subqueue(netdev, tx_ring->queue_index); + adapter->restart_queue++; + } +#else if (netif_queue_stopped(netdev) && !test_bit(__IXGBE_DOWN, &adapter->state)) { netif_wake_queue(netdev); adapter->restart_queue++; } +#endif } if (adapter->detect_tx_hung) if (ixgbe_check_tx_hang(adapter, tx_ring, eop, eop_desc)) +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + netif_stop_subqueue(netdev, tx_ring->queue_index); +#else netif_stop_queue(netdev); +#endif if (total_tx_packets >= tx_ring->work_limit) IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, tx_ring->eims_value); + tx_ring->total_bytes += total_tx_bytes; + tx_ring->total_packets += total_tx_packets; adapter->net_stats.tx_bytes += total_tx_bytes; adapter->net_stats.tx_packets += total_tx_packets; cleaned = total_tx_packets ? true : false; return cleaned; } +#ifdef CONFIG_DCA +static void ixgbe_update_rx_dca(struct ixgbe_adapter *adapter, + struct ixgbe_ring *rxr) +{ + u32 rxctrl; + int cpu = get_cpu(); + int q = rxr - adapter->rx_ring; + + if (rxr->cpu != cpu) { + rxctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_DCA_RXCTRL(q)); + rxctrl &= ~IXGBE_DCA_RXCTRL_CPUID_MASK; + rxctrl |= dca_get_tag(cpu); + rxctrl |= IXGBE_DCA_RXCTRL_DESC_DCA_EN; + rxctrl |= IXGBE_DCA_RXCTRL_HEAD_DCA_EN; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_DCA_RXCTRL(q), rxctrl); + rxr->cpu = cpu; + } + put_cpu(); +} + +static void ixgbe_update_tx_dca(struct ixgbe_adapter *adapter, + struct ixgbe_ring *txr) +{ + u32 txctrl; + int cpu = get_cpu(); + int q = txr - adapter->tx_ring; + + if (txr->cpu != cpu) { + txctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_DCA_TXCTRL(q)); + txctrl &= ~IXGBE_DCA_TXCTRL_CPUID_MASK; + txctrl |= dca_get_tag(cpu); + txctrl |= IXGBE_DCA_TXCTRL_DESC_DCA_EN; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_DCA_TXCTRL(q), txctrl); + txr->cpu = cpu; + } + put_cpu(); +} + +static void ixgbe_setup_dca(struct ixgbe_adapter *adapter) +{ + int i; + + if (!(adapter->flags & IXGBE_FLAG_DCA_ENABLED)) + return; + + for (i = 0; i < adapter->num_tx_queues; i++) { + adapter->tx_ring[i].cpu = -1; + ixgbe_update_tx_dca(adapter, &adapter->tx_ring[i]); + } + for (i = 0; i < adapter->num_rx_queues; i++) { + adapter->rx_ring[i].cpu = -1; + ixgbe_update_rx_dca(adapter, &adapter->rx_ring[i]); + } +} + +static int __ixgbe_notify_dca(struct device *dev, void *data) +{ + struct net_device *netdev = dev_get_drvdata(dev); + struct ixgbe_adapter *adapter = netdev_priv(netdev); + unsigned long event = *(unsigned long *)data; + + switch (event) { + case DCA_PROVIDER_ADD: + adapter->flags |= IXGBE_FLAG_DCA_ENABLED; + /* Always use CB2 mode, difference is masked + * in the CB driver. */ + IXGBE_WRITE_REG(&adapter->hw, IXGBE_DCA_CTRL, 2); + if (dca_add_requester(dev) == 0) { + ixgbe_setup_dca(adapter); + break; + } + /* Fall Through since DCA is disabled. */ + case DCA_PROVIDER_REMOVE: + if (adapter->flags & IXGBE_FLAG_DCA_ENABLED) { + dca_remove_requester(dev); + adapter->flags &= ~IXGBE_FLAG_DCA_ENABLED; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_DCA_CTRL, 1); + } + break; + } + + return 0; +} + +#endif /* CONFIG_DCA */ /** * ixgbe_receive_skb - Send a completed packet up the stack * @adapter: board private structure @@ -561,10 +665,15 @@ next_desc: adapter->net_stats.rx_bytes += total_rx_bytes; adapter->net_stats.rx_packets += total_rx_packets; + rx_ring->total_packets += total_rx_packets; + rx_ring->total_bytes += total_rx_bytes; + adapter->net_stats.rx_bytes += total_rx_bytes; + adapter->net_stats.rx_packets += total_rx_packets; + return cleaned; } -#define IXGBE_MAX_INTR 10 +static int ixgbe_clean_rxonly(struct napi_struct *, int); /** * ixgbe_configure_msix - Configure MSI-X hardware * @adapter: board private structure @@ -574,28 +683,195 @@ next_desc: **/ static void ixgbe_configure_msix(struct ixgbe_adapter *adapter) { - int i, vector = 0; + struct ixgbe_q_vector *q_vector; + int i, j, q_vectors, v_idx, r_idx; + u32 mask; - for (i = 0; i < adapter->num_tx_queues; i++) { - ixgbe_set_ivar(adapter, IXGBE_IVAR_TX_QUEUE(i), - IXGBE_MSIX_VECTOR(vector)); - writel(EITR_INTS_PER_SEC_TO_REG(adapter->tx_eitr), - adapter->hw.hw_addr + adapter->tx_ring[i].itr_register); - vector++; - } + q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; - for (i = 0; i < adapter->num_rx_queues; i++) { - ixgbe_set_ivar(adapter, IXGBE_IVAR_RX_QUEUE(i), - IXGBE_MSIX_VECTOR(vector)); - writel(EITR_INTS_PER_SEC_TO_REG(adapter->rx_eitr), - adapter->hw.hw_addr + adapter->rx_ring[i].itr_register); - vector++; + /* Populate the IVAR table and set the ITR values to the + * corresponding register. + */ + for (v_idx = 0; v_idx < q_vectors; v_idx++) { + q_vector = &adapter->q_vector[v_idx]; + /* XXX for_each_bit(...) */ + r_idx = find_first_bit(q_vector->rxr_idx, + adapter->num_rx_queues); + + for (i = 0; i < q_vector->rxr_count; i++) { + j = adapter->rx_ring[r_idx].reg_idx; + ixgbe_set_ivar(adapter, IXGBE_IVAR_RX_QUEUE(j), v_idx); + r_idx = find_next_bit(q_vector->rxr_idx, + adapter->num_rx_queues, + r_idx + 1); + } + r_idx = find_first_bit(q_vector->txr_idx, + adapter->num_tx_queues); + + for (i = 0; i < q_vector->txr_count; i++) { + j = adapter->tx_ring[r_idx].reg_idx; + ixgbe_set_ivar(adapter, IXGBE_IVAR_TX_QUEUE(j), v_idx); + r_idx = find_next_bit(q_vector->txr_idx, + adapter->num_tx_queues, + r_idx + 1); + } + + /* if this is a tx only vector use half the irq (tx) rate */ + if (q_vector->txr_count && !q_vector->rxr_count) + q_vector->eitr = adapter->tx_eitr; + else + /* rx only or mixed */ + q_vector->eitr = adapter->rx_eitr; + + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(v_idx), + EITR_INTS_PER_SEC_TO_REG(q_vector->eitr)); } - vector = adapter->num_tx_queues + adapter->num_rx_queues; - ixgbe_set_ivar(adapter, IXGBE_IVAR_OTHER_CAUSES_INDEX, - IXGBE_MSIX_VECTOR(vector)); - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(vector), 1950); + ixgbe_set_ivar(adapter, IXGBE_IVAR_OTHER_CAUSES_INDEX, v_idx); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(v_idx), 1950); + + /* set up to autoclear timer, lsc, and the vectors */ + mask = IXGBE_EIMS_ENABLE_MASK; + mask &= ~IXGBE_EIMS_OTHER; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, mask); +} + +enum latency_range { + lowest_latency = 0, + low_latency = 1, + bulk_latency = 2, + latency_invalid = 255 +}; + +/** + * ixgbe_update_itr - update the dynamic ITR value based on statistics + * @adapter: pointer to adapter + * @eitr: eitr setting (ints per sec) to give last timeslice + * @itr_setting: current throttle rate in ints/second + * @packets: the number of packets during this measurement interval + * @bytes: the number of bytes during this measurement interval + * + * Stores a new ITR value based on packets and byte + * counts during the last interrupt. The advantage of per interrupt + * computation is faster updates and more accurate ITR for the current + * traffic pattern. Constants in this function were computed + * based on theoretical maximum wire speed and thresholds were set based + * on testing data as well as attempting to minimize response time + * while increasing bulk throughput. + * this functionality is controlled by the InterruptThrottleRate module + * parameter (see ixgbe_param.c) + **/ +static u8 ixgbe_update_itr(struct ixgbe_adapter *adapter, + u32 eitr, u8 itr_setting, + int packets, int bytes) +{ + unsigned int retval = itr_setting; + u32 timepassed_us; + u64 bytes_perint; + + if (packets == 0) + goto update_itr_done; + + + /* simple throttlerate management + * 0-20MB/s lowest (100000 ints/s) + * 20-100MB/s low (20000 ints/s) + * 100-1249MB/s bulk (8000 ints/s) + */ + /* what was last interrupt timeslice? */ + timepassed_us = 1000000/eitr; + bytes_perint = bytes / timepassed_us; /* bytes/usec */ + + switch (itr_setting) { + case lowest_latency: + if (bytes_perint > adapter->eitr_low) + retval = low_latency; + break; + case low_latency: + if (bytes_perint > adapter->eitr_high) + retval = bulk_latency; + else if (bytes_perint <= adapter->eitr_low) + retval = lowest_latency; + break; + case bulk_latency: + if (bytes_perint <= adapter->eitr_high) + retval = low_latency; + break; + } + +update_itr_done: + return retval; +} + +static void ixgbe_set_itr_msix(struct ixgbe_q_vector *q_vector) +{ + struct ixgbe_adapter *adapter = q_vector->adapter; + struct ixgbe_hw *hw = &adapter->hw; + u32 new_itr; + u8 current_itr, ret_itr; + int i, r_idx, v_idx = ((void *)q_vector - (void *)(adapter->q_vector)) / + sizeof(struct ixgbe_q_vector); + struct ixgbe_ring *rx_ring, *tx_ring; + + r_idx = find_first_bit(q_vector->txr_idx, adapter->num_tx_queues); + for (i = 0; i < q_vector->txr_count; i++) { + tx_ring = &(adapter->tx_ring[r_idx]); + ret_itr = ixgbe_update_itr(adapter, q_vector->eitr, + q_vector->tx_eitr, + tx_ring->total_packets, + tx_ring->total_bytes); + /* if the result for this queue would decrease interrupt + * rate for this vector then use that result */ + q_vector->tx_eitr = ((q_vector->tx_eitr > ret_itr) ? + q_vector->tx_eitr - 1 : ret_itr); + r_idx = find_next_bit(q_vector->txr_idx, adapter->num_tx_queues, + r_idx + 1); + } + + r_idx = find_first_bit(q_vector->rxr_idx, adapter->num_rx_queues); + for (i = 0; i < q_vector->rxr_count; i++) { + rx_ring = &(adapter->rx_ring[r_idx]); + ret_itr = ixgbe_update_itr(adapter, q_vector->eitr, + q_vector->rx_eitr, + rx_ring->total_packets, + rx_ring->total_bytes); + /* if the result for this queue would decrease interrupt + * rate for this vector then use that result */ + q_vector->rx_eitr = ((q_vector->rx_eitr > ret_itr) ? + q_vector->rx_eitr - 1 : ret_itr); + r_idx = find_next_bit(q_vector->rxr_idx, adapter->num_rx_queues, + r_idx + 1); + } + + current_itr = max(q_vector->rx_eitr, q_vector->tx_eitr); + + switch (current_itr) { + /* counts and packets in update_itr are dependent on these numbers */ + case lowest_latency: + new_itr = 100000; + break; + case low_latency: + new_itr = 20000; /* aka hwitr = ~200 */ + break; + case bulk_latency: + default: + new_itr = 8000; + break; + } + + if (new_itr != q_vector->eitr) { + u32 itr_reg; + /* do an exponential smoothing */ + new_itr = ((q_vector->eitr * 90)/100) + ((new_itr * 10)/100); + q_vector->eitr = new_itr; + itr_reg = EITR_INTS_PER_SEC_TO_REG(new_itr); + /* must write high and low 16 bits to reset counter */ + DPRINTK(TX_ERR, DEBUG, "writing eitr(%d): %08X\n", v_idx, + itr_reg); + IXGBE_WRITE_REG(hw, IXGBE_EITR(v_idx), itr_reg | (itr_reg)<<16); + } + + return; } static irqreturn_t ixgbe_msix_lsc(int irq, void *data) @@ -619,153 +895,302 @@ static irqreturn_t ixgbe_msix_lsc(int irq, void *data) static irqreturn_t ixgbe_msix_clean_tx(int irq, void *data) { - struct ixgbe_ring *txr = data; - struct ixgbe_adapter *adapter = txr->adapter; + struct ixgbe_q_vector *q_vector = data; + struct ixgbe_adapter *adapter = q_vector->adapter; + struct ixgbe_ring *txr; + int i, r_idx; - ixgbe_clean_tx_irq(adapter, txr); + if (!q_vector->txr_count) + return IRQ_HANDLED; + + r_idx = find_first_bit(q_vector->txr_idx, adapter->num_tx_queues); + for (i = 0; i < q_vector->txr_count; i++) { + txr = &(adapter->tx_ring[r_idx]); +#ifdef CONFIG_DCA + if (adapter->flags & IXGBE_FLAG_DCA_ENABLED) + ixgbe_update_tx_dca(adapter, txr); +#endif + txr->total_bytes = 0; + txr->total_packets = 0; + ixgbe_clean_tx_irq(adapter, txr); + r_idx = find_next_bit(q_vector->txr_idx, adapter->num_tx_queues, + r_idx + 1); + } return IRQ_HANDLED; } +/** + * ixgbe_msix_clean_rx - single unshared vector rx clean (all queues) + * @irq: unused + * @data: pointer to our q_vector struct for this interrupt vector + **/ static irqreturn_t ixgbe_msix_clean_rx(int irq, void *data) { - struct ixgbe_ring *rxr = data; - struct ixgbe_adapter *adapter = rxr->adapter; + struct ixgbe_q_vector *q_vector = data; + struct ixgbe_adapter *adapter = q_vector->adapter; + struct ixgbe_ring *rxr; + int r_idx; + + r_idx = find_first_bit(q_vector->rxr_idx, adapter->num_rx_queues); + if (!q_vector->rxr_count) + return IRQ_HANDLED; + + rxr = &(adapter->rx_ring[r_idx]); + /* disable interrupts on this vector only */ + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, rxr->v_idx); + rxr->total_bytes = 0; + rxr->total_packets = 0; + netif_rx_schedule(adapter->netdev, &q_vector->napi); + + return IRQ_HANDLED; +} + +static irqreturn_t ixgbe_msix_clean_many(int irq, void *data) +{ + ixgbe_msix_clean_rx(irq, data); + ixgbe_msix_clean_tx(irq, data); - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, rxr->eims_value); - netif_rx_schedule(adapter->netdev, &adapter->napi); return IRQ_HANDLED; } +/** + * ixgbe_clean_rxonly - msix (aka one shot) rx clean routine + * @napi: napi struct with our devices info in it + * @budget: amount of work driver is allowed to do this pass, in packets + * + **/ static int ixgbe_clean_rxonly(struct napi_struct *napi, int budget) { - struct ixgbe_adapter *adapter = container_of(napi, - struct ixgbe_adapter, napi); - struct net_device *netdev = adapter->netdev; + struct ixgbe_q_vector *q_vector = + container_of(napi, struct ixgbe_q_vector, napi); + struct ixgbe_adapter *adapter = q_vector->adapter; + struct ixgbe_ring *rxr; int work_done = 0; - struct ixgbe_ring *rxr = adapter->rx_ring; + long r_idx; - /* Keep link state information with original netdev */ - if (!netif_carrier_ok(netdev)) - goto quit_polling; + r_idx = find_first_bit(q_vector->rxr_idx, adapter->num_rx_queues); + rxr = &(adapter->rx_ring[r_idx]); +#ifdef CONFIG_DCA + if (adapter->flags & IXGBE_FLAG_DCA_ENABLED) + ixgbe_update_rx_dca(adapter, rxr); +#endif ixgbe_clean_rx_irq(adapter, rxr, &work_done, budget); - /* If no Tx and not enough Rx work done, exit the polling mode */ - if ((work_done < budget) || !netif_running(netdev)) { -quit_polling: - netif_rx_complete(netdev, napi); + /* If all Rx work done, exit the polling mode */ + if (work_done < budget) { + netif_rx_complete(adapter->netdev, napi); + if (adapter->rx_eitr < IXGBE_MIN_ITR_USECS) + ixgbe_set_itr_msix(q_vector); if (!test_bit(__IXGBE_DOWN, &adapter->state)) - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, - rxr->eims_value); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, rxr->v_idx); } return work_done; } +static inline void map_vector_to_rxq(struct ixgbe_adapter *a, int v_idx, + int r_idx) +{ + a->q_vector[v_idx].adapter = a; + set_bit(r_idx, a->q_vector[v_idx].rxr_idx); + a->q_vector[v_idx].rxr_count++; + a->rx_ring[r_idx].v_idx = 1 << v_idx; +} + +static inline void map_vector_to_txq(struct ixgbe_adapter *a, int v_idx, + int r_idx) +{ + a->q_vector[v_idx].adapter = a; + set_bit(r_idx, a->q_vector[v_idx].txr_idx); + a->q_vector[v_idx].txr_count++; + a->tx_ring[r_idx].v_idx = 1 << v_idx; +} + /** - * ixgbe_setup_msix - Initialize MSI-X interrupts + * ixgbe_map_rings_to_vectors - Maps descriptor rings to vectors + * @adapter: board private structure to initialize + * @vectors: allotted vector count for descriptor rings * - * ixgbe_setup_msix allocates MSI-X vectors and requests - * interrutps from the kernel. + * This function maps descriptor rings to the queue-specific vectors + * we were allotted through the MSI-X enabling code. Ideally, we'd have + * one vector per ring/queue, but on a constrained vector budget, we + * group the rings as "efficiently" as possible. You would add new + * mapping configurations in here. **/ -static int ixgbe_setup_msix(struct ixgbe_adapter *adapter) -{ - struct net_device *netdev = adapter->netdev; - int i, int_vector = 0, err = 0; - int max_msix_count; +static int ixgbe_map_rings_to_vectors(struct ixgbe_adapter *adapter, + int vectors) +{ + int v_start = 0; + int rxr_idx = 0, txr_idx = 0; + int rxr_remaining = adapter->num_rx_queues; + int txr_remaining = adapter->num_tx_queues; + int i, j; + int rqpv, tqpv; + int err = 0; + + /* No mapping required if MSI-X is disabled. */ + if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED)) + goto out; - /* +1 for the LSC interrupt */ - max_msix_count = adapter->num_rx_queues + adapter->num_tx_queues + 1; - adapter->msix_entries = kcalloc(max_msix_count, - sizeof(struct msix_entry), GFP_KERNEL); - if (!adapter->msix_entries) - return -ENOMEM; + /* + * The ideal configuration... + * We have enough vectors to map one per queue. + */ + if (vectors == adapter->num_rx_queues + adapter->num_tx_queues) { + for (; rxr_idx < rxr_remaining; v_start++, rxr_idx++) + map_vector_to_rxq(adapter, v_start, rxr_idx); - for (i = 0; i < max_msix_count; i++) - adapter->msix_entries[i].entry = i; + for (; txr_idx < txr_remaining; v_start++, txr_idx++) + map_vector_to_txq(adapter, v_start, txr_idx); - err = pci_enable_msix(adapter->pdev, adapter->msix_entries, - max_msix_count); - if (err) goto out; + } - for (i = 0; i < adapter->num_tx_queues; i++) { - sprintf(adapter->tx_ring[i].name, "%s-tx%d", netdev->name, i); - err = request_irq(adapter->msix_entries[int_vector].vector, - &ixgbe_msix_clean_tx, - 0, - adapter->tx_ring[i].name, - &(adapter->tx_ring[i])); - if (err) { - DPRINTK(PROBE, ERR, - "request_irq failed for MSIX interrupt " - "Error: %d\n", err); - goto release_irqs; + /* + * If we don't have enough vectors for a 1-to-1 + * mapping, we'll have to group them so there are + * multiple queues per vector. + */ + /* Re-adjusting *qpv takes care of the remainder. */ + for (i = v_start; i < vectors; i++) { + rqpv = DIV_ROUND_UP(rxr_remaining, vectors - i); + for (j = 0; j < rqpv; j++) { + map_vector_to_rxq(adapter, i, rxr_idx); + rxr_idx++; + rxr_remaining--; + } + } + for (i = v_start; i < vectors; i++) { + tqpv = DIV_ROUND_UP(txr_remaining, vectors - i); + for (j = 0; j < tqpv; j++) { + map_vector_to_txq(adapter, i, txr_idx); + txr_idx++; + txr_remaining--; } - adapter->tx_ring[i].eims_value = - (1 << IXGBE_MSIX_VECTOR(int_vector)); - adapter->tx_ring[i].itr_register = IXGBE_EITR(int_vector); - int_vector++; } - for (i = 0; i < adapter->num_rx_queues; i++) { - if (strlen(netdev->name) < (IFNAMSIZ - 5)) - sprintf(adapter->rx_ring[i].name, - "%s-rx%d", netdev->name, i); - else - memcpy(adapter->rx_ring[i].name, - netdev->name, IFNAMSIZ); - err = request_irq(adapter->msix_entries[int_vector].vector, - &ixgbe_msix_clean_rx, 0, - adapter->rx_ring[i].name, - &(adapter->rx_ring[i])); +out: + return err; +} + +/** + * ixgbe_request_msix_irqs - Initialize MSI-X interrupts + * @adapter: board private structure + * + * ixgbe_request_msix_irqs allocates MSI-X vectors and requests + * interrupts from the kernel. + **/ +static int ixgbe_request_msix_irqs(struct ixgbe_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + irqreturn_t (*handler)(int, void *); + int i, vector, q_vectors, err; + + /* Decrement for Other and TCP Timer vectors */ + q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; + + /* Map the Tx/Rx rings to the vectors we were allotted. */ + err = ixgbe_map_rings_to_vectors(adapter, q_vectors); + if (err) + goto out; + +#define SET_HANDLER(_v) ((!(_v)->rxr_count) ? &ixgbe_msix_clean_tx : \ + (!(_v)->txr_count) ? &ixgbe_msix_clean_rx : \ + &ixgbe_msix_clean_many) + for (vector = 0; vector < q_vectors; vector++) { + handler = SET_HANDLER(&adapter->q_vector[vector]); + sprintf(adapter->name[vector], "%s:v%d-%s", + netdev->name, vector, + (handler == &ixgbe_msix_clean_rx) ? "Rx" : + ((handler == &ixgbe_msix_clean_tx) ? "Tx" : "TxRx")); + err = request_irq(adapter->msix_entries[vector].vector, + handler, 0, adapter->name[vector], + &(adapter->q_vector[vector])); if (err) { DPRINTK(PROBE, ERR, "request_irq failed for MSIX interrupt " "Error: %d\n", err); - goto release_irqs; + goto free_queue_irqs; } - - adapter->rx_ring[i].eims_value = - (1 << IXGBE_MSIX_VECTOR(int_vector)); - adapter->rx_ring[i].itr_register = IXGBE_EITR(int_vector); - int_vector++; } - sprintf(adapter->lsc_name, "%s-lsc", netdev->name); - err = request_irq(adapter->msix_entries[int_vector].vector, - &ixgbe_msix_lsc, 0, adapter->lsc_name, netdev); + sprintf(adapter->name[vector], "%s:lsc", netdev->name); + err = request_irq(adapter->msix_entries[vector].vector, + &ixgbe_msix_lsc, 0, adapter->name[vector], netdev); if (err) { DPRINTK(PROBE, ERR, "request_irq for msix_lsc failed: %d\n", err); - goto release_irqs; + goto free_queue_irqs; } - /* FIXME: implement netif_napi_remove() instead */ - adapter->napi.poll = ixgbe_clean_rxonly; - adapter->flags |= IXGBE_FLAG_MSIX_ENABLED; return 0; -release_irqs: - int_vector--; - for (; int_vector >= adapter->num_tx_queues; int_vector--) - free_irq(adapter->msix_entries[int_vector].vector, - &(adapter->rx_ring[int_vector - - adapter->num_tx_queues])); - - for (; int_vector >= 0; int_vector--) - free_irq(adapter->msix_entries[int_vector].vector, - &(adapter->tx_ring[int_vector])); -out: +free_queue_irqs: + for (i = vector - 1; i >= 0; i--) + free_irq(adapter->msix_entries[--vector].vector, + &(adapter->q_vector[i])); + adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED; + pci_disable_msix(adapter->pdev); kfree(adapter->msix_entries); adapter->msix_entries = NULL; - adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED; +out: return err; } +static void ixgbe_set_itr(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_q_vector *q_vector = adapter->q_vector; + u8 current_itr; + u32 new_itr = q_vector->eitr; + struct ixgbe_ring *rx_ring = &adapter->rx_ring[0]; + struct ixgbe_ring *tx_ring = &adapter->tx_ring[0]; + + q_vector->tx_eitr = ixgbe_update_itr(adapter, new_itr, + q_vector->tx_eitr, + tx_ring->total_packets, + tx_ring->total_bytes); + q_vector->rx_eitr = ixgbe_update_itr(adapter, new_itr, + q_vector->rx_eitr, + rx_ring->total_packets, + rx_ring->total_bytes); + + current_itr = max(q_vector->rx_eitr, q_vector->tx_eitr); + + switch (current_itr) { + /* counts and packets in update_itr are dependent on these numbers */ + case lowest_latency: + new_itr = 100000; + break; + case low_latency: + new_itr = 20000; /* aka hwitr = ~200 */ + break; + case bulk_latency: + new_itr = 8000; + break; + default: + break; + } + + if (new_itr != q_vector->eitr) { + u32 itr_reg; + /* do an exponential smoothing */ + new_itr = ((q_vector->eitr * 90)/100) + ((new_itr * 10)/100); + q_vector->eitr = new_itr; + itr_reg = EITR_INTS_PER_SEC_TO_REG(new_itr); + /* must write high and low 16 bits to reset counter */ + IXGBE_WRITE_REG(hw, IXGBE_EITR(0), itr_reg | (itr_reg)<<16); + } + + return; +} + +static inline void ixgbe_irq_enable(struct ixgbe_adapter *adapter); + /** - * ixgbe_intr - Interrupt Handler + * ixgbe_intr - legacy mode Interrupt Handler * @irq: interrupt number * @data: pointer to a network interface device structure * @pt_regs: CPU registers structure @@ -777,8 +1202,10 @@ static irqreturn_t ixgbe_intr(int irq, void *data) struct ixgbe_hw *hw = &adapter->hw; u32 eicr; - eicr = IXGBE_READ_REG(hw, IXGBE_EICR); + /* for NAPI, using EIAM to auto-mask tx/rx interrupt bits on read + * therefore no explict interrupt disable is necessary */ + eicr = IXGBE_READ_REG(hw, IXGBE_EICR); if (!eicr) return IRQ_NONE; /* Not our interrupt */ @@ -787,16 +1214,33 @@ static irqreturn_t ixgbe_intr(int irq, void *data) if (!test_bit(__IXGBE_DOWN, &adapter->state)) mod_timer(&adapter->watchdog_timer, jiffies); } - if (netif_rx_schedule_prep(netdev, &adapter->napi)) { - /* Disable interrupts and register for poll. The flush of the - * posted write is intentionally left out. */ - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0); - __netif_rx_schedule(netdev, &adapter->napi); + + + if (netif_rx_schedule_prep(netdev, &adapter->q_vector[0].napi)) { + adapter->tx_ring[0].total_packets = 0; + adapter->tx_ring[0].total_bytes = 0; + adapter->rx_ring[0].total_packets = 0; + adapter->rx_ring[0].total_bytes = 0; + /* would disable interrupts here but EIAM disabled it */ + __netif_rx_schedule(netdev, &adapter->q_vector[0].napi); } return IRQ_HANDLED; } +static inline void ixgbe_reset_q_vectors(struct ixgbe_adapter *adapter) +{ + int i, q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; + + for (i = 0; i < q_vectors; i++) { + struct ixgbe_q_vector *q_vector = &adapter->q_vector[i]; + bitmap_zero(q_vector->rxr_idx, MAX_RX_QUEUES); + bitmap_zero(q_vector->txr_idx, MAX_TX_QUEUES); + q_vector->rxr_count = 0; + q_vector->txr_count = 0; + } +} + /** * ixgbe_request_irq - initialize interrupts * @adapter: board private structure @@ -804,40 +1248,24 @@ static irqreturn_t ixgbe_intr(int irq, void *data) * Attempts to configure interrupts using the best available * capabilities of the hardware and kernel. **/ -static int ixgbe_request_irq(struct ixgbe_adapter *adapter, u32 *num_rx_queues) +static int ixgbe_request_irq(struct ixgbe_adapter *adapter) { struct net_device *netdev = adapter->netdev; - int flags, err; - irq_handler_t handler = ixgbe_intr; - - flags = IRQF_SHARED; - - err = ixgbe_setup_msix(adapter); - if (!err) - goto request_done; - - /* - * if we can't do MSI-X, fall through and try MSI - * No need to reallocate memory since we're decreasing the number of - * queues. We just won't use the other ones, also it is freed correctly - * on ixgbe_remove. - */ - *num_rx_queues = 1; + int err; - /* do MSI */ - err = pci_enable_msi(adapter->pdev); - if (!err) { - adapter->flags |= IXGBE_FLAG_MSI_ENABLED; - flags &= ~IRQF_SHARED; - handler = &ixgbe_intr; + if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) { + err = ixgbe_request_msix_irqs(adapter); + } else if (adapter->flags & IXGBE_FLAG_MSI_ENABLED) { + err = request_irq(adapter->pdev->irq, &ixgbe_intr, 0, + netdev->name, netdev); + } else { + err = request_irq(adapter->pdev->irq, &ixgbe_intr, IRQF_SHARED, + netdev->name, netdev); } - err = request_irq(adapter->pdev->irq, handler, flags, - netdev->name, netdev); if (err) DPRINTK(PROBE, ERR, "request_irq failed, Error %d\n", err); -request_done: return err; } @@ -846,28 +1274,22 @@ static void ixgbe_free_irq(struct ixgbe_adapter *adapter) struct net_device *netdev = adapter->netdev; if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) { - int i; + int i, q_vectors; - for (i = 0; i < adapter->num_tx_queues; i++) - free_irq(adapter->msix_entries[i].vector, - &(adapter->tx_ring[i])); - for (i = 0; i < adapter->num_rx_queues; i++) - free_irq(adapter->msix_entries[i + - adapter->num_tx_queues].vector, - &(adapter->rx_ring[i])); - i = adapter->num_rx_queues + adapter->num_tx_queues; + q_vectors = adapter->num_msix_vectors; + + i = q_vectors - 1; free_irq(adapter->msix_entries[i].vector, netdev); - pci_disable_msix(adapter->pdev); - kfree(adapter->msix_entries); - adapter->msix_entries = NULL; - adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED; - return; - } - free_irq(adapter->pdev->irq, netdev); - if (adapter->flags & IXGBE_FLAG_MSI_ENABLED) { - pci_disable_msi(adapter->pdev); - adapter->flags &= ~IXGBE_FLAG_MSI_ENABLED; + i--; + for (; i >= 0; i--) { + free_irq(adapter->msix_entries[i].vector, + &(adapter->q_vector[i])); + } + + ixgbe_reset_q_vectors(adapter); + } else { + free_irq(adapter->pdev->irq, netdev); } } @@ -879,7 +1301,13 @@ static inline void ixgbe_irq_disable(struct ixgbe_adapter *adapter) { IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0); IXGBE_WRITE_FLUSH(&adapter->hw); - synchronize_irq(adapter->pdev->irq); + if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) { + int i; + for (i = 0; i < adapter->num_msix_vectors; i++) + synchronize_irq(adapter->msix_entries[i].vector); + } else { + synchronize_irq(adapter->pdev->irq); + } } /** @@ -888,12 +1316,9 @@ static inline void ixgbe_irq_disable(struct ixgbe_adapter *adapter) **/ static inline void ixgbe_irq_enable(struct ixgbe_adapter *adapter) { - if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, - (IXGBE_EIMS_ENABLE_MASK & - ~(IXGBE_EIMS_OTHER | IXGBE_EIMS_LSC))); - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, - IXGBE_EIMS_ENABLE_MASK); + u32 mask; + mask = IXGBE_EIMS_ENABLE_MASK; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, mask); IXGBE_WRITE_FLUSH(&adapter->hw); } @@ -903,20 +1328,18 @@ static inline void ixgbe_irq_enable(struct ixgbe_adapter *adapter) **/ static void ixgbe_configure_msi_and_legacy(struct ixgbe_adapter *adapter) { - int i; struct ixgbe_hw *hw = &adapter->hw; - if (adapter->rx_eitr) - IXGBE_WRITE_REG(hw, IXGBE_EITR(0), - EITR_INTS_PER_SEC_TO_REG(adapter->rx_eitr)); - - /* for re-triggering the interrupt in non-NAPI mode */ - adapter->rx_ring[0].eims_value = (1 << IXGBE_MSIX_VECTOR(0)); - adapter->tx_ring[0].eims_value = (1 << IXGBE_MSIX_VECTOR(0)); + IXGBE_WRITE_REG(hw, IXGBE_EITR(0), + EITR_INTS_PER_SEC_TO_REG(adapter->rx_eitr)); ixgbe_set_ivar(adapter, IXGBE_IVAR_RX_QUEUE(0), 0); - for (i = 0; i < adapter->num_tx_queues; i++) - ixgbe_set_ivar(adapter, IXGBE_IVAR_TX_QUEUE(i), i); + ixgbe_set_ivar(adapter, IXGBE_IVAR_TX_QUEUE(0), 0); + + map_vector_to_rxq(adapter, 0, 0); + map_vector_to_txq(adapter, 0, 0); + + DPRINTK(HW, INFO, "Legacy interrupt IVAR setup done\n"); } /** @@ -929,23 +1352,29 @@ static void ixgbe_configure_tx(struct ixgbe_adapter *adapter) { u64 tdba; struct ixgbe_hw *hw = &adapter->hw; - u32 i, tdlen; + u32 i, j, tdlen, txctrl; /* Setup the HW Tx Head and Tail descriptor pointers */ for (i = 0; i < adapter->num_tx_queues; i++) { + j = adapter->tx_ring[i].reg_idx; tdba = adapter->tx_ring[i].dma; tdlen = adapter->tx_ring[i].count * - sizeof(union ixgbe_adv_tx_desc); - IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i), (tdba & DMA_32BIT_MASK)); - IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32)); - IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i), tdlen); - IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0); - IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0); - adapter->tx_ring[i].head = IXGBE_TDH(i); - adapter->tx_ring[i].tail = IXGBE_TDT(i); + sizeof(union ixgbe_adv_tx_desc); + IXGBE_WRITE_REG(hw, IXGBE_TDBAL(j), + (tdba & DMA_32BIT_MASK)); + IXGBE_WRITE_REG(hw, IXGBE_TDBAH(j), (tdba >> 32)); + IXGBE_WRITE_REG(hw, IXGBE_TDLEN(j), tdlen); + IXGBE_WRITE_REG(hw, IXGBE_TDH(j), 0); + IXGBE_WRITE_REG(hw, IXGBE_TDT(j), 0); + adapter->tx_ring[i].head = IXGBE_TDH(j); + adapter->tx_ring[i].tail = IXGBE_TDT(j); + /* Disable Tx Head Writeback RO bit, since this hoses + * bookkeeping if things aren't delivered in order. + */ + txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i)); + txctrl &= ~IXGBE_DCA_TXCTRL_TX_WB_RO_EN; + IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl); } - - IXGBE_WRITE_REG(hw, IXGBE_TIPG, IXGBE_TIPG_FIBER_DEFAULT); } #define PAGE_USE_COUNT(S) (((S) >> PAGE_SHIFT) + \ @@ -964,13 +1393,12 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter) struct ixgbe_hw *hw = &adapter->hw; struct net_device *netdev = adapter->netdev; int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; + int i, j; u32 rdlen, rxctrl, rxcsum; u32 random[10]; - u32 reta, mrqc; - int i; u32 fctrl, hlreg0; - u32 srrctl; u32 pages; + u32 reta = 0, mrqc, srrctl; /* Decide whether to use packet split mode or not */ if (netdev->mtu > ETH_DATA_LEN) @@ -990,6 +1418,7 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter) fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL); fctrl |= IXGBE_FCTRL_BAM; + fctrl |= IXGBE_FCTRL_DPF; /* discard pause frames when FC enabled */ IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl); hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0); @@ -1041,37 +1470,23 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter) adapter->rx_ring[i].tail = IXGBE_RDT(i); } - if (adapter->num_rx_queues > 1) { - /* Random 40bytes used as random key in RSS hash function */ - get_random_bytes(&random[0], 40); - - switch (adapter->num_rx_queues) { - case 8: - case 4: - /* Bits [3:0] in each byte refers the Rx queue no */ - reta = 0x00010203; - break; - case 2: - reta = 0x00010001; - break; - default: - reta = 0x00000000; - break; - } - + if (adapter->flags & IXGBE_FLAG_RSS_ENABLED) { /* Fill out redirection table */ - for (i = 0; i < 32; i++) { - IXGBE_WRITE_REG_ARRAY(hw, IXGBE_RETA(0), i, reta); - if (adapter->num_rx_queues > 4) { - i++; - IXGBE_WRITE_REG_ARRAY(hw, IXGBE_RETA(0), i, - 0x04050607); - } + for (i = 0, j = 0; i < 128; i++, j++) { + if (j == adapter->ring_feature[RING_F_RSS].indices) + j = 0; + /* reta = 4-byte sliding window of + * 0x00..(indices-1)(indices-1)00..etc. */ + reta = (reta << 8) | (j * 0x11); + if ((i & 3) == 3) + IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta); } /* Fill out hash function seeds */ + /* XXX use a random constant here to glue certain flows */ + get_random_bytes(&random[0], 40); for (i = 0; i < 10; i++) - IXGBE_WRITE_REG_ARRAY(hw, IXGBE_RSSRK(0), i, random[i]); + IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), random[i]); mrqc = IXGBE_MRQC_RSSEN /* Perform hash on these packet types */ @@ -1085,26 +1500,23 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter) | IXGBE_MRQC_RSS_FIELD_IPV6_UDP | IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP; IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc); + } - /* Multiqueue and packet checksumming are mutually exclusive. */ - rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM); + rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM); + + if (adapter->flags & IXGBE_FLAG_RSS_ENABLED || + adapter->flags & IXGBE_FLAG_RX_CSUM_ENABLED) { + /* Disable indicating checksum in descriptor, enables + * RSS hash */ rxcsum |= IXGBE_RXCSUM_PCSD; - IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum); - } else { - /* Enable Receive Checksum Offload for TCP and UDP */ - rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM); - if (adapter->flags & IXGBE_FLAG_RX_CSUM_ENABLED) { - /* Enable IPv4 payload checksum for UDP fragments - * Must be used in conjunction with packet-split. */ - rxcsum |= IXGBE_RXCSUM_IPPCSE; - } else { - /* don't need to clear IPPCSE as it defaults to 0 */ - } - IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum); } - /* Enable Receives */ - IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl); - rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL); + if (!(rxcsum & IXGBE_RXCSUM_PCSD)) { + /* Enable IPv4 payload checksum for UDP fragments + * if PCSD is not set */ + rxcsum |= IXGBE_RXCSUM_IPPCSE; + } + + IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum); } static void ixgbe_vlan_rx_register(struct net_device *netdev, @@ -1224,6 +1636,42 @@ static void ixgbe_set_multi(struct net_device *netdev) } +static void ixgbe_napi_enable_all(struct ixgbe_adapter *adapter) +{ + int q_idx; + struct ixgbe_q_vector *q_vector; + int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; + + /* legacy and MSI only use one vector */ + if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED)) + q_vectors = 1; + + for (q_idx = 0; q_idx < q_vectors; q_idx++) { + q_vector = &adapter->q_vector[q_idx]; + if (!q_vector->rxr_count) + continue; + napi_enable(&q_vector->napi); + } +} + +static void ixgbe_napi_disable_all(struct ixgbe_adapter *adapter) +{ + int q_idx; + struct ixgbe_q_vector *q_vector; + int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; + + /* legacy and MSI only use one vector */ + if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED)) + q_vectors = 1; + + for (q_idx = 0; q_idx < q_vectors; q_idx++) { + q_vector = &adapter->q_vector[q_idx]; + if (!q_vector->rxr_count) + continue; + napi_disable(&q_vector->napi); + } +} + static void ixgbe_configure(struct ixgbe_adapter *adapter) { struct net_device *netdev = adapter->netdev; @@ -1243,30 +1691,35 @@ static void ixgbe_configure(struct ixgbe_adapter *adapter) static int ixgbe_up_complete(struct ixgbe_adapter *adapter) { struct net_device *netdev = adapter->netdev; - int i; - u32 gpie = 0; struct ixgbe_hw *hw = &adapter->hw; - u32 txdctl, rxdctl, mhadd; + int i, j = 0; int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; + u32 txdctl, rxdctl, mhadd; + u32 gpie; ixgbe_get_hw_control(adapter); - if (adapter->flags & (IXGBE_FLAG_MSIX_ENABLED | - IXGBE_FLAG_MSI_ENABLED)) { + if ((adapter->flags & IXGBE_FLAG_MSIX_ENABLED) || + (adapter->flags & IXGBE_FLAG_MSI_ENABLED)) { if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) { gpie = (IXGBE_GPIE_MSIX_MODE | IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT | IXGBE_GPIE_OCD); } else { /* MSI only */ - gpie = (IXGBE_GPIE_EIAME | - IXGBE_GPIE_PBA_SUPPORT); + gpie = 0; } - IXGBE_WRITE_REG(&adapter->hw, IXGBE_GPIE, gpie); - gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE); + /* XXX: to interrupt immediately for EICS writes, enable this */ + /* gpie |= IXGBE_GPIE_EIMEN; */ + IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie); } - mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD); + if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED)) { + /* legacy interrupts, use EIAM to auto-mask when reading EICR, + * specifically only auto mask tx and rx interrupts */ + IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE); + } + mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD); if (max_frame != (mhadd >> IXGBE_MHADD_MFS_SHIFT)) { mhadd &= ~IXGBE_MHADD_MFS_MASK; mhadd |= max_frame << IXGBE_MHADD_MFS_SHIFT; @@ -1275,15 +1728,21 @@ static int ixgbe_up_complete(struct ixgbe_adapter *adapter) } for (i = 0; i < adapter->num_tx_queues; i++) { - txdctl = IXGBE_READ_REG(&adapter->hw, IXGBE_TXDCTL(i)); + j = adapter->tx_ring[i].reg_idx; + txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(j)); txdctl |= IXGBE_TXDCTL_ENABLE; - IXGBE_WRITE_REG(&adapter->hw, IXGBE_TXDCTL(i), txdctl); + IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(j), txdctl); } for (i = 0; i < adapter->num_rx_queues; i++) { - rxdctl = IXGBE_READ_REG(&adapter->hw, IXGBE_RXDCTL(i)); + j = adapter->rx_ring[i].reg_idx; + rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(j)); + /* enable PTHRESH=32 descriptors (half the internal cache) + * and HTHRESH=0 descriptors (to minimize latency on fetch), + * this also removes a pesky rx_no_buffer_count increment */ + rxdctl |= 0x0020; rxdctl |= IXGBE_RXDCTL_ENABLE; - IXGBE_WRITE_REG(&adapter->hw, IXGBE_RXDCTL(i), rxdctl); + IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(j), rxdctl); } /* enable all receives */ rxdctl = IXGBE_READ_REG(hw, IXGBE_RXCTRL); @@ -1296,7 +1755,11 @@ static int ixgbe_up_complete(struct ixgbe_adapter *adapter) ixgbe_configure_msi_and_legacy(adapter); clear_bit(__IXGBE_DOWN, &adapter->state); - napi_enable(&adapter->napi); + ixgbe_napi_enable_all(adapter); + + /* clear any pending interrupts, may auto mask */ + IXGBE_READ_REG(hw, IXGBE_EICR); + ixgbe_irq_enable(adapter); /* bring the link up in the watchdog, this could race with our first @@ -1338,7 +1801,7 @@ static int ixgbe_resume(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); struct ixgbe_adapter *adapter = netdev_priv(netdev); - u32 err, num_rx_queues = adapter->num_rx_queues; + u32 err; pci_set_power_state(pdev, PCI_D0); pci_restore_state(pdev); @@ -1354,7 +1817,7 @@ static int ixgbe_resume(struct pci_dev *pdev) pci_enable_wake(pdev, PCI_D3cold, 0); if (netif_running(netdev)) { - err = ixgbe_request_irq(adapter, &num_rx_queues); + err = ixgbe_request_irq(adapter); if (err) return err; } @@ -1454,27 +1917,27 @@ static void ixgbe_clean_tx_ring(struct ixgbe_adapter *adapter, } /** - * ixgbe_clean_all_tx_rings - Free Tx Buffers for all queues + * ixgbe_clean_all_rx_rings - Free Rx Buffers for all queues * @adapter: board private structure **/ -static void ixgbe_clean_all_tx_rings(struct ixgbe_adapter *adapter) +static void ixgbe_clean_all_rx_rings(struct ixgbe_adapter *adapter) { int i; - for (i = 0; i < adapter->num_tx_queues; i++) - ixgbe_clean_tx_ring(adapter, &adapter->tx_ring[i]); + for (i = 0; i < adapter->num_rx_queues; i++) + ixgbe_clean_rx_ring(adapter, &adapter->rx_ring[i]); } /** - * ixgbe_clean_all_rx_rings - Free Rx Buffers for all queues + * ixgbe_clean_all_tx_rings - Free Tx Buffers for all queues * @adapter: board private structure **/ -static void ixgbe_clean_all_rx_rings(struct ixgbe_adapter *adapter) +static void ixgbe_clean_all_tx_rings(struct ixgbe_adapter *adapter) { int i; - for (i = 0; i < adapter->num_rx_queues; i++) - ixgbe_clean_rx_ring(adapter, &adapter->rx_ring[i]); + for (i = 0; i < adapter->num_tx_queues; i++) + ixgbe_clean_tx_ring(adapter, &adapter->tx_ring[i]); } void ixgbe_down(struct ixgbe_adapter *adapter) @@ -1498,10 +1961,9 @@ void ixgbe_down(struct ixgbe_adapter *adapter) IXGBE_WRITE_FLUSH(&adapter->hw); msleep(10); - napi_disable(&adapter->napi); - ixgbe_irq_disable(adapter); + ixgbe_napi_disable_all(adapter); del_timer_sync(&adapter->watchdog_timer); netif_carrier_off(netdev); @@ -1552,27 +2014,37 @@ static void ixgbe_shutdown(struct pci_dev *pdev) } /** - * ixgbe_clean - NAPI Rx polling callback - * @adapter: board private structure + * ixgbe_poll - NAPI Rx polling callback + * @napi: structure for representing this polling device + * @budget: how many packets driver is allowed to clean + * + * This function is used for legacy and MSI, NAPI mode **/ -static int ixgbe_clean(struct napi_struct *napi, int budget) +static int ixgbe_poll(struct napi_struct *napi, int budget) { - struct ixgbe_adapter *adapter = container_of(napi, - struct ixgbe_adapter, napi); - struct net_device *netdev = adapter->netdev; + struct ixgbe_q_vector *q_vector = container_of(napi, + struct ixgbe_q_vector, napi); + struct ixgbe_adapter *adapter = q_vector->adapter; int tx_cleaned = 0, work_done = 0; - /* In non-MSIX case, there is no multi-Tx/Rx queue */ +#ifdef CONFIG_DCA + if (adapter->flags & IXGBE_FLAG_DCA_ENABLED) { + ixgbe_update_tx_dca(adapter, adapter->tx_ring); + ixgbe_update_rx_dca(adapter, adapter->rx_ring); + } +#endif + tx_cleaned = ixgbe_clean_tx_irq(adapter, adapter->tx_ring); - ixgbe_clean_rx_irq(adapter, &adapter->rx_ring[0], &work_done, - budget); + ixgbe_clean_rx_irq(adapter, adapter->rx_ring, &work_done, budget); if (tx_cleaned) work_done = budget; /* If budget not fully consumed, exit the polling mode */ if (work_done < budget) { - netif_rx_complete(netdev, napi); + netif_rx_complete(adapter->netdev, napi); + if (adapter->rx_eitr < IXGBE_MIN_ITR_USECS) + ixgbe_set_itr(adapter); if (!test_bit(__IXGBE_DOWN, &adapter->state)) ixgbe_irq_enable(adapter); } @@ -1602,6 +2074,136 @@ static void ixgbe_reset_task(struct work_struct *work) ixgbe_reinit_locked(adapter); } +static void ixgbe_acquire_msix_vectors(struct ixgbe_adapter *adapter, + int vectors) +{ + int err, vector_threshold; + + /* We'll want at least 3 (vector_threshold): + * 1) TxQ[0] Cleanup + * 2) RxQ[0] Cleanup + * 3) Other (Link Status Change, etc.) + * 4) TCP Timer (optional) + */ + vector_threshold = MIN_MSIX_COUNT; + + /* The more we get, the more we will assign to Tx/Rx Cleanup + * for the separate queues...where Rx Cleanup >= Tx Cleanup. + * Right now, we simply care about how many we'll get; we'll + * set them up later while requesting irq's. + */ + while (vectors >= vector_threshold) { + err = pci_enable_msix(adapter->pdev, adapter->msix_entries, + vectors); + if (!err) /* Success in acquiring all requested vectors. */ + break; + else if (err < 0) + vectors = 0; /* Nasty failure, quit now */ + else /* err == number of vectors we should try again with */ + vectors = err; + } + + if (vectors < vector_threshold) { + /* Can't allocate enough MSI-X interrupts? Oh well. + * This just means we'll go with either a single MSI + * vector or fall back to legacy interrupts. + */ + DPRINTK(HW, DEBUG, "Unable to allocate MSI-X interrupts\n"); + adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED; + kfree(adapter->msix_entries); + adapter->msix_entries = NULL; + adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED; + adapter->num_tx_queues = 1; + adapter->num_rx_queues = 1; + } else { + adapter->flags |= IXGBE_FLAG_MSIX_ENABLED; /* Woot! */ + adapter->num_msix_vectors = vectors; + } +} + +static void __devinit ixgbe_set_num_queues(struct ixgbe_adapter *adapter) +{ + int nrq, ntq; + int feature_mask = 0, rss_i, rss_m; + + /* Number of supported queues */ + switch (adapter->hw.mac.type) { + case ixgbe_mac_82598EB: + rss_i = adapter->ring_feature[RING_F_RSS].indices; + rss_m = 0; + feature_mask |= IXGBE_FLAG_RSS_ENABLED; + + switch (adapter->flags & feature_mask) { + case (IXGBE_FLAG_RSS_ENABLED): + rss_m = 0xF; + nrq = rss_i; +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + ntq = rss_i; +#else + ntq = 1; +#endif + break; + case 0: + default: + rss_i = 0; + rss_m = 0; + nrq = 1; + ntq = 1; + break; + } + + adapter->ring_feature[RING_F_RSS].indices = rss_i; + adapter->ring_feature[RING_F_RSS].mask = rss_m; + break; + default: + nrq = 1; + ntq = 1; + break; + } + + adapter->num_rx_queues = nrq; + adapter->num_tx_queues = ntq; +} + +/** + * ixgbe_cache_ring_register - Descriptor ring to register mapping + * @adapter: board private structure to initialize + * + * Once we know the feature-set enabled for the device, we'll cache + * the register offset the descriptor ring is assigned to. + **/ +static void __devinit ixgbe_cache_ring_register(struct ixgbe_adapter *adapter) +{ + /* TODO: Remove all uses of the indices in the cases where multiple + * features are OR'd together, if the feature set makes sense. + */ + int feature_mask = 0, rss_i; + int i, txr_idx, rxr_idx; + + /* Number of supported queues */ + switch (adapter->hw.mac.type) { + case ixgbe_mac_82598EB: + rss_i = adapter->ring_feature[RING_F_RSS].indices; + txr_idx = 0; + rxr_idx = 0; + feature_mask |= IXGBE_FLAG_RSS_ENABLED; + switch (adapter->flags & feature_mask) { + case (IXGBE_FLAG_RSS_ENABLED): + for (i = 0; i < adapter->num_rx_queues; i++) + adapter->rx_ring[i].reg_idx = i; + for (i = 0; i < adapter->num_tx_queues; i++) + adapter->tx_ring[i].reg_idx = i; + break; + case 0: + default: + break; + } + break; + default: + break; + } +} + /** * ixgbe_alloc_queues - Allocate memory for all rings * @adapter: board private structure to initialize @@ -1617,25 +2219,167 @@ static int __devinit ixgbe_alloc_queues(struct ixgbe_adapter *adapter) adapter->tx_ring = kcalloc(adapter->num_tx_queues, sizeof(struct ixgbe_ring), GFP_KERNEL); if (!adapter->tx_ring) - return -ENOMEM; - - for (i = 0; i < adapter->num_tx_queues; i++) - adapter->tx_ring[i].count = IXGBE_DEFAULT_TXD; + goto err_tx_ring_allocation; adapter->rx_ring = kcalloc(adapter->num_rx_queues, sizeof(struct ixgbe_ring), GFP_KERNEL); - if (!adapter->rx_ring) { - kfree(adapter->tx_ring); - return -ENOMEM; - } + if (!adapter->rx_ring) + goto err_rx_ring_allocation; + for (i = 0; i < adapter->num_tx_queues; i++) { + adapter->tx_ring[i].count = IXGBE_DEFAULT_TXD; + adapter->tx_ring[i].queue_index = i; + } for (i = 0; i < adapter->num_rx_queues; i++) { - adapter->rx_ring[i].adapter = adapter; - adapter->rx_ring[i].itr_register = IXGBE_EITR(i); adapter->rx_ring[i].count = IXGBE_DEFAULT_RXD; + adapter->rx_ring[i].queue_index = i; + } + + ixgbe_cache_ring_register(adapter); + + return 0; + +err_rx_ring_allocation: + kfree(adapter->tx_ring); +err_tx_ring_allocation: + return -ENOMEM; +} + +/** + * ixgbe_set_interrupt_capability - set MSI-X or MSI if supported + * @adapter: board private structure to initialize + * + * Attempt to configure the interrupts using the best available + * capabilities of the hardware and the kernel. + **/ +static int __devinit ixgbe_set_interrupt_capability(struct ixgbe_adapter + *adapter) +{ + int err = 0; + int vector, v_budget; + + /* + * It's easy to be greedy for MSI-X vectors, but it really + * doesn't do us much good if we have a lot more vectors + * than CPU's. So let's be conservative and only ask for + * (roughly) twice the number of vectors as there are CPU's. + */ + v_budget = min(adapter->num_rx_queues + adapter->num_tx_queues, + (int)(num_online_cpus() * 2)) + NON_Q_VECTORS; + + /* + * At the same time, hardware can only support a maximum of + * MAX_MSIX_COUNT vectors. With features such as RSS and VMDq, + * we can easily reach upwards of 64 Rx descriptor queues and + * 32 Tx queues. Thus, we cap it off in those rare cases where + * the cpu count also exceeds our vector limit. + */ + v_budget = min(v_budget, MAX_MSIX_COUNT); + + /* A failure in MSI-X entry allocation isn't fatal, but it does + * mean we disable MSI-X capabilities of the adapter. */ + adapter->msix_entries = kcalloc(v_budget, + sizeof(struct msix_entry), GFP_KERNEL); + if (!adapter->msix_entries) { + adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED; + ixgbe_set_num_queues(adapter); + kfree(adapter->tx_ring); + kfree(adapter->rx_ring); + err = ixgbe_alloc_queues(adapter); + if (err) { + DPRINTK(PROBE, ERR, "Unable to allocate memory " + "for queues\n"); + goto out; + } + + goto try_msi; + } + + for (vector = 0; vector < v_budget; vector++) + adapter->msix_entries[vector].entry = vector; + + ixgbe_acquire_msix_vectors(adapter, v_budget); + + if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) + goto out; + +try_msi: + err = pci_enable_msi(adapter->pdev); + if (!err) { + adapter->flags |= IXGBE_FLAG_MSI_ENABLED; + } else { + DPRINTK(HW, DEBUG, "Unable to allocate MSI interrupt, " + "falling back to legacy. Error: %d\n", err); + /* reset err */ + err = 0; + } + +out: +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + /* Notify the stack of the (possibly) reduced Tx Queue count. */ + adapter->netdev->egress_subqueue_count = adapter->num_tx_queues; +#endif + + return err; +} + +static void ixgbe_reset_interrupt_capability(struct ixgbe_adapter *adapter) +{ + if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) { + adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED; + pci_disable_msix(adapter->pdev); + kfree(adapter->msix_entries); + adapter->msix_entries = NULL; + } else if (adapter->flags & IXGBE_FLAG_MSI_ENABLED) { + adapter->flags &= ~IXGBE_FLAG_MSI_ENABLED; + pci_disable_msi(adapter->pdev); } + return; +} + +/** + * ixgbe_init_interrupt_scheme - Determine proper interrupt scheme + * @adapter: board private structure to initialize + * + * We determine which interrupt scheme to use based on... + * - Kernel support (MSI, MSI-X) + * - which can be user-defined (via MODULE_PARAM) + * - Hardware queue count (num_*_queues) + * - defined by miscellaneous hardware support/features (RSS, etc.) + **/ +static int __devinit ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter) +{ + int err; + + /* Number of supported queues */ + ixgbe_set_num_queues(adapter); + + err = ixgbe_alloc_queues(adapter); + if (err) { + DPRINTK(PROBE, ERR, "Unable to allocate memory for queues\n"); + goto err_alloc_queues; + } + + err = ixgbe_set_interrupt_capability(adapter); + if (err) { + DPRINTK(PROBE, ERR, "Unable to setup interrupt capabilities\n"); + goto err_set_interrupt; + } + + DPRINTK(DRV, INFO, "Multiqueue %s: Rx Queue count = %u, " + "Tx Queue count = %u\n", + (adapter->num_rx_queues > 1) ? "Enabled" : + "Disabled", adapter->num_rx_queues, adapter->num_tx_queues); + + set_bit(__IXGBE_DOWN, &adapter->state); return 0; + +err_set_interrupt: + kfree(adapter->tx_ring); + kfree(adapter->rx_ring); +err_alloc_queues: + return err; } /** @@ -1650,11 +2394,22 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct pci_dev *pdev = adapter->pdev; + unsigned int rss; + + /* Set capability flags */ + rss = min(IXGBE_MAX_RSS_INDICES, (int)num_online_cpus()); + adapter->ring_feature[RING_F_RSS].indices = rss; + adapter->flags |= IXGBE_FLAG_RSS_ENABLED; + + /* Enable Dynamic interrupt throttling by default */ + adapter->rx_eitr = 1; + adapter->tx_eitr = 1; /* default flow control settings */ hw->fc.original_type = ixgbe_fc_full; hw->fc.type = ixgbe_fc_full; + /* select 10G link by default */ hw->mac.link_mode_select = IXGBE_AUTOC_LMS_10G_LINK_NO_AN; if (hw->mac.ops.reset(hw)) { dev_err(&pdev->dev, "HW Init failed\n"); @@ -1672,16 +2427,9 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter) return -EIO; } - /* Set the default values */ - adapter->num_rx_queues = IXGBE_DEFAULT_RXQ; - adapter->num_tx_queues = 1; + /* enable rx csum by default */ adapter->flags |= IXGBE_FLAG_RX_CSUM_ENABLED; - if (ixgbe_alloc_queues(adapter)) { - dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); - return -ENOMEM; - } - set_bit(__IXGBE_DOWN, &adapter->state); return 0; @@ -1721,7 +2469,6 @@ int ixgbe_setup_tx_resources(struct ixgbe_adapter *adapter, return -ENOMEM; } - txdr->adapter = adapter; txdr->next_to_use = 0; txdr->next_to_clean = 0; txdr->work_limit = txdr->count; @@ -1740,7 +2487,7 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter, struct ixgbe_ring *rxdr) { struct pci_dev *pdev = adapter->pdev; - int size, desc_len; + int size; size = sizeof(struct ixgbe_rx_buffer) * rxdr->count; rxdr->rx_buffer_info = vmalloc(size); @@ -1751,10 +2498,8 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter, } memset(rxdr->rx_buffer_info, 0, size); - desc_len = sizeof(union ixgbe_adv_rx_desc); - /* Round up to nearest 4K */ - rxdr->size = rxdr->count * desc_len; + rxdr->size = rxdr->count * sizeof(union ixgbe_adv_rx_desc); rxdr->size = ALIGN(rxdr->size, 4096); rxdr->desc = pci_alloc_consistent(pdev, rxdr->size, &rxdr->dma); @@ -1768,7 +2513,6 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter, rxdr->next_to_clean = 0; rxdr->next_to_use = 0; - rxdr->adapter = adapter; return 0; } @@ -1846,8 +2590,7 @@ static void ixgbe_free_all_rx_resources(struct ixgbe_adapter *adapter) } /** - * ixgbe_setup_all_tx_resources - wrapper to allocate Tx resources - * (Descriptors) for all queues + * ixgbe_setup_all_tx_resources - allocate all queues Tx resources * @adapter: board private structure * * If this function returns with an error, then it's possible one or @@ -1873,8 +2616,7 @@ static int ixgbe_setup_all_tx_resources(struct ixgbe_adapter *adapter) } /** - * ixgbe_setup_all_rx_resources - wrapper to allocate Rx resources - * (Descriptors) for all queues + * ixgbe_setup_all_rx_resources - allocate all queues Rx resources * @adapter: board private structure * * If this function returns with an error, then it's possible one or @@ -1916,6 +2658,9 @@ static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu) (max_frame > IXGBE_MAX_JUMBO_FRAME_SIZE)) return -EINVAL; + DPRINTK(PROBE, INFO, "changing MTU from %d to %d\n", + netdev->mtu, new_mtu); + /* must set new MTU before calling down or up */ netdev->mtu = new_mtu; if (netif_running(netdev)) @@ -1940,19 +2685,16 @@ static int ixgbe_open(struct net_device *netdev) { struct ixgbe_adapter *adapter = netdev_priv(netdev); int err; - u32 num_rx_queues = adapter->num_rx_queues; -try_intr_reinit: + /* disallow open during test */ + if (test_bit(__IXGBE_TESTING, &adapter->state)) + return -EBUSY; + /* allocate transmit descriptors */ err = ixgbe_setup_all_tx_resources(adapter); if (err) goto err_setup_tx; - if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED)) { - num_rx_queues = 1; - adapter->num_rx_queues = num_rx_queues; - } - /* allocate receive descriptors */ err = ixgbe_setup_all_rx_resources(adapter); if (err) @@ -1960,31 +2702,10 @@ try_intr_reinit: ixgbe_configure(adapter); - err = ixgbe_request_irq(adapter, &num_rx_queues); + err = ixgbe_request_irq(adapter); if (err) goto err_req_irq; - /* ixgbe_request might have reduced num_rx_queues */ - if (num_rx_queues < adapter->num_rx_queues) { - /* We didn't get MSI-X, so we need to release everything, - * set our Rx queue count to num_rx_queues, and redo the - * whole init process. - */ - ixgbe_free_irq(adapter); - if (adapter->flags & IXGBE_FLAG_MSI_ENABLED) { - pci_disable_msi(adapter->pdev); - adapter->flags &= ~IXGBE_FLAG_MSI_ENABLED; - } - ixgbe_free_all_rx_resources(adapter); - ixgbe_free_all_tx_resources(adapter); - adapter->num_rx_queues = num_rx_queues; - - /* Reset the hardware, and start over. */ - ixgbe_reset(adapter); - - goto try_intr_reinit; - } - err = ixgbe_up_complete(adapter); if (err) goto err_up; @@ -2120,6 +2841,9 @@ static void ixgbe_watchdog(unsigned long data) struct net_device *netdev = adapter->netdev; bool link_up; u32 link_speed = 0; +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + int i; +#endif adapter->hw.mac.ops.check_link(&adapter->hw, &(link_speed), &link_up); @@ -2134,13 +2858,17 @@ static void ixgbe_watchdog(unsigned long data) (link_speed == IXGBE_LINK_SPEED_10GB_FULL ? "10 Gbps" : (link_speed == IXGBE_LINK_SPEED_1GB_FULL ? - "1 Gpbs" : "unknown speed")), + "1 Gbps" : "unknown speed")), ((FLOW_RX && FLOW_TX) ? "RX/TX" : (FLOW_RX ? "RX" : (FLOW_TX ? "TX" : "None")))); netif_carrier_on(netdev); netif_wake_queue(netdev); +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + for (i = 0; i < adapter->num_tx_queues; i++) + netif_wake_subqueue(netdev, i); +#endif } else { /* Force detection of hung controller */ adapter->detect_tx_hung = true; @@ -2155,10 +2883,23 @@ static void ixgbe_watchdog(unsigned long data) ixgbe_update_stats(adapter); - /* Reset the timer */ - if (!test_bit(__IXGBE_DOWN, &adapter->state)) + if (!test_bit(__IXGBE_DOWN, &adapter->state)) { + /* Cause software interrupt to ensure rx rings are cleaned */ + if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) { + u32 eics = + (1 << (adapter->num_msix_vectors - NON_Q_VECTORS)) - 1; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, eics); + } else { + /* for legacy and MSI interrupts don't set any bits that + * are enabled for EIAM, because this operation would + * set *both* EIMS and EICS for any bit in EIAM */ + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, + (IXGBE_EICS_TCP_TIMER | IXGBE_EICS_OTHER)); + } + /* Reset the timer */ mod_timer(&adapter->watchdog_timer, round_jiffies(jiffies + 2 * HZ)); + } } static int ixgbe_tso(struct ixgbe_adapter *adapter, @@ -2171,7 +2912,6 @@ static int ixgbe_tso(struct ixgbe_adapter *adapter, struct ixgbe_tx_buffer *tx_buffer_info; u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; u32 mss_l4len_idx = 0, l4len; - *hdr_len = 0; if (skb_is_gso(skb)) { if (skb_header_cloned(skb)) { @@ -2278,11 +3018,29 @@ static bool ixgbe_tx_csum(struct ixgbe_adapter *adapter, IXGBE_ADVTXD_DTYP_CTXT); if (skb->ip_summed == CHECKSUM_PARTIAL) { - if (skb->protocol == htons(ETH_P_IP)) + switch (skb->protocol) { + case __constant_htons(ETH_P_IP): type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; + if (ip_hdr(skb)->protocol == IPPROTO_TCP) + type_tucmd_mlhl |= + IXGBE_ADVTXD_TUCMD_L4T_TCP; + break; + + case __constant_htons(ETH_P_IPV6): + /* XXX what about other V6 headers?? */ + if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP) + type_tucmd_mlhl |= + IXGBE_ADVTXD_TUCMD_L4T_TCP; + break; - if (skb->sk->sk_protocol == IPPROTO_TCP) - type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; + default: + if (unlikely(net_ratelimit())) { + DPRINTK(PROBE, WARNING, + "partial checksum but proto=%x!\n", + skb->protocol); + } + break; + } } context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd_mlhl); @@ -2437,7 +3195,11 @@ static int __ixgbe_maybe_stop_tx(struct net_device *netdev, { struct ixgbe_adapter *adapter = netdev_priv(netdev); +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + netif_stop_subqueue(netdev, tx_ring->queue_index); +#else netif_stop_queue(netdev); +#endif /* Herbert's original patch had: * smp_mb__after_netif_stop_queue(); * but since that doesn't exist yet, just open code it. */ @@ -2449,7 +3211,11 @@ static int __ixgbe_maybe_stop_tx(struct net_device *netdev, return -EBUSY; /* A reprieve! - use start_queue because it doesn't call schedule */ +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + netif_wake_subqueue(netdev, tx_ring->queue_index); +#else netif_wake_queue(netdev); +#endif ++adapter->restart_queue; return 0; } @@ -2470,15 +3236,18 @@ static int ixgbe_xmit_frame(struct sk_buff *skb, struct net_device *netdev) unsigned int len = skb->len; unsigned int first; unsigned int tx_flags = 0; - u8 hdr_len; - int tso; + u8 hdr_len = 0; + int r_idx = 0, tso; unsigned int mss = 0; int count = 0; unsigned int f; unsigned int nr_frags = skb_shinfo(skb)->nr_frags; len -= skb->data_len; +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + r_idx = (adapter->num_tx_queues - 1) & skb->queue_mapping; +#endif + tx_ring = &adapter->tx_ring[r_idx]; - tx_ring = adapter->tx_ring; if (skb->len <= 0) { dev_kfree_skb(skb); @@ -2587,6 +3356,31 @@ static void ixgbe_netpoll(struct net_device *netdev) #endif /** + * ixgbe_napi_add_all - prep napi structs for use + * @adapter: private struct + * helper function to napi_add each possible q_vector->napi + */ +static void ixgbe_napi_add_all(struct ixgbe_adapter *adapter) +{ + int i, q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; + int (*poll)(struct napi_struct *, int); + + if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) { + poll = &ixgbe_clean_rxonly; + } else { + poll = &ixgbe_poll; + /* only one q_vector for legacy modes */ + q_vectors = 1; + } + + for (i = 0; i < q_vectors; i++) { + struct ixgbe_q_vector *q_vector = &adapter->q_vector[i]; + netif_napi_add(adapter->netdev, &q_vector->napi, + (*poll), 64); + } +} + +/** * ixgbe_probe - Device Initialization Routine * @pdev: PCI device information struct * @ent: entry in ixgbe_pci_tbl @@ -2638,7 +3432,11 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, pci_set_master(pdev); +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + netdev = alloc_etherdev_mq(sizeof(struct ixgbe_adapter), MAX_TX_QUEUES); +#else netdev = alloc_etherdev(sizeof(struct ixgbe_adapter)); +#endif if (!netdev) { err = -ENOMEM; goto err_alloc_etherdev; @@ -2679,7 +3477,6 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, ixgbe_set_ethtool_ops(netdev); netdev->tx_timeout = &ixgbe_tx_timeout; netdev->watchdog_timeo = 5 * HZ; - netif_napi_add(netdev, &adapter->napi, ixgbe_clean, 64); netdev->vlan_rx_register = ixgbe_vlan_rx_register; netdev->vlan_rx_add_vid = ixgbe_vlan_rx_add_vid; netdev->vlan_rx_kill_vid = ixgbe_vlan_rx_kill_vid; @@ -2702,6 +3499,7 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, /* Setup hw api */ memcpy(&hw->mac.ops, ii->mac_ops, sizeof(hw->mac.ops)); + hw->mac.type = ii->mac; err = ii->get_invariants(hw); if (err) @@ -2724,6 +3522,9 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, if (pci_using_dac) netdev->features |= NETIF_F_HIGHDMA; +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + netdev->features |= NETIF_F_MULTI_QUEUE; +#endif /* make sure the EEPROM is good */ if (ixgbe_validate_eeprom_checksum(hw, NULL) < 0) { @@ -2753,9 +3554,9 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, hw->fc.low_water = IXGBE_DEFAULT_FCRTL; hw->fc.pause_time = IXGBE_DEFAULT_FCPAUSE; - /* Interrupt Throttle Rate */ - adapter->rx_eitr = (1000000 / IXGBE_DEFAULT_ITR_RX_USECS); - adapter->tx_eitr = (1000000 / IXGBE_DEFAULT_ITR_TX_USECS); + err = ixgbe_init_interrupt_scheme(adapter); + if (err) + goto err_sw_init; /* print bus type/speed/width info */ pci_read_config_word(pdev, IXGBE_PCI_LINK_STATUS, &link_status); @@ -2778,17 +3579,40 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, hw->mac.type, hw->phy.type, (part_num >> 8), (part_num & 0xff)); + if (link_width <= IXGBE_PCI_LINK_WIDTH_4) { + dev_warn(&pdev->dev, "PCI-Express bandwidth available for " + "this card is not sufficient for optimal " + "performance.\n"); + dev_warn(&pdev->dev, "For optimal performance a x8 " + "PCI-Express slot is required.\n"); + } + /* reset the hardware with the new settings */ ixgbe_start_hw(hw); netif_carrier_off(netdev); netif_stop_queue(netdev); +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + for (i = 0; i < adapter->num_tx_queues; i++) + netif_stop_subqueue(netdev, i); +#endif + + ixgbe_napi_add_all(adapter); strcpy(netdev->name, "eth%d"); err = register_netdev(netdev); if (err) goto err_register; +#ifdef CONFIG_DCA + if (dca_add_requester(&pdev->dev) == 0) { + adapter->flags |= IXGBE_FLAG_DCA_ENABLED; + /* always use CB2 mode, difference is masked + * in the CB driver */ + IXGBE_WRITE_REG(hw, IXGBE_DCA_CTRL, 2); + ixgbe_setup_dca(adapter); + } +#endif dev_info(&pdev->dev, "Intel(R) 10 Gigabit Network Connection\n"); cards_found++; @@ -2798,6 +3622,7 @@ err_register: ixgbe_release_hw_control(adapter); err_hw_init: err_sw_init: + ixgbe_reset_interrupt_capability(adapter); err_eeprom: iounmap(hw->hw_addr); err_ioremap: @@ -2829,16 +3654,27 @@ static void __devexit ixgbe_remove(struct pci_dev *pdev) flush_scheduled_work(); +#ifdef CONFIG_DCA + if (adapter->flags & IXGBE_FLAG_DCA_ENABLED) { + adapter->flags &= ~IXGBE_FLAG_DCA_ENABLED; + dca_remove_requester(&pdev->dev); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_DCA_CTRL, 1); + } + +#endif unregister_netdev(netdev); - ixgbe_release_hw_control(adapter); + ixgbe_reset_interrupt_capability(adapter); - kfree(adapter->tx_ring); - kfree(adapter->rx_ring); + ixgbe_release_hw_control(adapter); iounmap(adapter->hw.hw_addr); pci_release_regions(pdev); + DPRINTK(PROBE, INFO, "complete\n"); + kfree(adapter->tx_ring); + kfree(adapter->rx_ring); + free_netdev(netdev); pci_disable_device(pdev); @@ -2950,6 +3786,10 @@ static int __init ixgbe_init_module(void) printk(KERN_INFO "%s: %s\n", ixgbe_driver_name, ixgbe_copyright); +#ifdef CONFIG_DCA + dca_register_notify(&dca_notifier); + +#endif ret = pci_register_driver(&ixgbe_driver); return ret; } @@ -2963,8 +3803,25 @@ module_init(ixgbe_init_module); **/ static void __exit ixgbe_exit_module(void) { +#ifdef CONFIG_DCA + dca_unregister_notify(&dca_notifier); +#endif pci_unregister_driver(&ixgbe_driver); } + +#ifdef CONFIG_DCA +static int ixgbe_notify_dca(struct notifier_block *nb, unsigned long event, + void *p) +{ + int ret_val; + + ret_val = driver_for_each_device(&ixgbe_driver.driver, NULL, &event, + __ixgbe_notify_dca); + + return ret_val ? NOTIFY_BAD : NOTIFY_DONE; +} +#endif /* CONFIG_DCA */ + module_exit(ixgbe_exit_module); /* ixgbe_main.c */ |