From bea3348eef27e6044b6161fd04c3152215f96411 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 3 Oct 2007 16:41:36 -0700 Subject: [NET]: Make NAPI polling independent of struct net_device objects. Several devices have multiple independant RX queues per net device, and some have a single interrupt doorbell for several queues. In either case, it's easier to support layouts like that if the structure representing the poll is independant from the net device itself. The signature of the ->poll() call back goes from: int foo_poll(struct net_device *dev, int *budget) to int foo_poll(struct napi_struct *napi, int budget) The caller is returned the number of RX packets processed (or the number of "NAPI credits" consumed if you want to get abstract). The callee no longer messes around bumping dev->quota, *budget, etc. because that is all handled in the caller upon return. The napi_struct is to be embedded in the device driver private data structures. Furthermore, it is the driver's responsibility to disable all NAPI instances in it's ->stop() device close handler. Since the napi_struct is privatized into the driver's private data structures, only the driver knows how to get at all of the napi_struct instances it may have per-device. With lots of help and suggestions from Rusty Russell, Roland Dreier, Michael Chan, Jeff Garzik, and Jamal Hadi Salim. Bug fixes from Thomas Graf, Roland Dreier, Peter Zijlstra, Joseph Fannin, Scott Wood, Hans J. Koch, and Michael Chan. [ Ported to current tree and all drivers converted. Integrated Stephen's follow-on kerneldoc additions, and restored poll_list handling to the old style to fix mutual exclusion issues. -DaveM ] Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/cxgb3/adapter.h | 22 ++---- drivers/net/cxgb3/cxgb3_main.c | 96 +++++++---------------- drivers/net/cxgb3/sge.c | 170 +++++++++++++++++------------------------ 3 files changed, 102 insertions(+), 186 deletions(-) (limited to 'drivers/net/cxgb3') diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h index 20e887de254..04426170338 100644 --- a/drivers/net/cxgb3/adapter.h +++ b/drivers/net/cxgb3/adapter.h @@ -49,11 +49,13 @@ typedef irqreturn_t(*intr_handler_t) (int, void *); struct vlan_group; - struct adapter; +struct sge_qset; + struct port_info { struct adapter *adapter; struct vlan_group *vlan_grp; + struct sge_qset *qs; const struct port_type_info *port_type; u8 port_id; u8 rx_csum_offload; @@ -173,10 +175,12 @@ enum { /* per port SGE statistics */ }; struct sge_qset { /* an SGE queue set */ + struct adapter *adap; + struct napi_struct napi; struct sge_rspq rspq; struct sge_fl fl[SGE_RXQ_PER_SET]; struct sge_txq txq[SGE_TXQ_PER_SET]; - struct net_device *netdev; /* associated net device */ + struct net_device *netdev; unsigned long txq_stopped; /* which Tx queues are stopped */ struct timer_list tx_reclaim_timer; /* reclaims TX buffers */ unsigned long port_stats[SGE_PSTAT_MAX]; @@ -221,12 +225,6 @@ struct adapter { struct delayed_work adap_check_task; struct work_struct ext_intr_handler_task; - /* - * Dummy netdevices are needed when using multiple receive queues with - * NAPI as each netdevice can service only one queue. - */ - struct net_device *dummy_netdev[SGE_QSETS - 1]; - struct dentry *debugfs_root; struct mutex mdio_lock; @@ -253,12 +251,6 @@ static inline struct port_info *adap2pinfo(struct adapter *adap, int idx) return netdev_priv(adap->port[idx]); } -/* - * We use the spare atalk_ptr to map a net device to its SGE queue set. - * This is a macro so it can be used as l-value. - */ -#define dev2qset(netdev) ((netdev)->atalk_ptr) - #define OFFLOAD_DEVMAP_BIT 15 #define tdev2adap(d) container_of(d, struct adapter, tdev) @@ -284,7 +276,7 @@ int t3_mgmt_tx(struct adapter *adap, struct sk_buff *skb); void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p); int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports, int irq_vec_idx, const struct qset_params *p, - int ntxq, struct net_device *netdev); + int ntxq, struct net_device *dev); int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, unsigned char *data); irqreturn_t t3_sge_intr_msix(int irq, void *cookie); diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c index 5ab319cfe5d..5db7d4e27ec 100644 --- a/drivers/net/cxgb3/cxgb3_main.c +++ b/drivers/net/cxgb3/cxgb3_main.c @@ -339,49 +339,17 @@ static void setup_rss(struct adapter *adap) V_RRCPLCPUSIZE(6), cpus, rspq_map); } -/* - * If we have multiple receive queues per port serviced by NAPI we need one - * netdevice per queue as NAPI operates on netdevices. We already have one - * netdevice, namely the one associated with the interface, so we use dummy - * ones for any additional queues. Note that these netdevices exist purely - * so that NAPI has something to work with, they do not represent network - * ports and are not registered. - */ -static int init_dummy_netdevs(struct adapter *adap) +static void init_napi(struct adapter *adap) { - int i, j, dummy_idx = 0; - struct net_device *nd; - - for_each_port(adap, i) { - struct net_device *dev = adap->port[i]; - const struct port_info *pi = netdev_priv(dev); - - for (j = 0; j < pi->nqsets - 1; j++) { - if (!adap->dummy_netdev[dummy_idx]) { - struct port_info *p; - - nd = alloc_netdev(sizeof(*p), "", ether_setup); - if (!nd) - goto free_all; + int i; - p = netdev_priv(nd); - p->adapter = adap; - nd->weight = 64; - set_bit(__LINK_STATE_START, &nd->state); - adap->dummy_netdev[dummy_idx] = nd; - } - strcpy(adap->dummy_netdev[dummy_idx]->name, dev->name); - dummy_idx++; - } - } - return 0; + for (i = 0; i < SGE_QSETS; i++) { + struct sge_qset *qs = &adap->sge.qs[i]; -free_all: - while (--dummy_idx >= 0) { - free_netdev(adap->dummy_netdev[dummy_idx]); - adap->dummy_netdev[dummy_idx] = NULL; + if (qs->adap) + netif_napi_add(qs->netdev, &qs->napi, qs->napi.poll, + 64); } - return -ENOMEM; } /* @@ -392,20 +360,18 @@ free_all: static void quiesce_rx(struct adapter *adap) { int i; - struct net_device *dev; - for_each_port(adap, i) { - dev = adap->port[i]; - while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) - msleep(1); - } + for (i = 0; i < SGE_QSETS; i++) + if (adap->sge.qs[i].adap) + napi_disable(&adap->sge.qs[i].napi); +} - for (i = 0; i < ARRAY_SIZE(adap->dummy_netdev); i++) { - dev = adap->dummy_netdev[i]; - if (dev) - while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) - msleep(1); - } +static void enable_all_napi(struct adapter *adap) +{ + int i; + for (i = 0; i < SGE_QSETS; i++) + if (adap->sge.qs[i].adap) + napi_enable(&adap->sge.qs[i].napi); } /** @@ -418,7 +384,7 @@ static void quiesce_rx(struct adapter *adap) */ static int setup_sge_qsets(struct adapter *adap) { - int i, j, err, irq_idx = 0, qset_idx = 0, dummy_dev_idx = 0; + int i, j, err, irq_idx = 0, qset_idx = 0; unsigned int ntxq = SGE_TXQ_PER_SET; if (adap->params.rev > 0 && !(adap->flags & USING_MSI)) @@ -426,15 +392,14 @@ static int setup_sge_qsets(struct adapter *adap) for_each_port(adap, i) { struct net_device *dev = adap->port[i]; - const struct port_info *pi = netdev_priv(dev); + struct port_info *pi = netdev_priv(dev); + pi->qs = &adap->sge.qs[pi->first_qset]; for (j = 0; j < pi->nqsets; ++j, ++qset_idx) { err = t3_sge_alloc_qset(adap, qset_idx, 1, (adap->flags & USING_MSIX) ? qset_idx + 1 : irq_idx, - &adap->params.sge.qset[qset_idx], ntxq, - j == 0 ? dev : - adap-> dummy_netdev[dummy_dev_idx++]); + &adap->params.sge.qset[qset_idx], ntxq, dev); if (err) { t3_free_sge_resources(adap); return err; @@ -845,21 +810,18 @@ static int cxgb_up(struct adapter *adap) goto out; } - err = init_dummy_netdevs(adap); - if (err) - goto out; - err = t3_init_hw(adap, 0); if (err) goto out; t3_write_reg(adap, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12)); - + err = setup_sge_qsets(adap); if (err) goto out; setup_rss(adap); + init_napi(adap); adap->flags |= FULL_INIT_DONE; } @@ -886,6 +848,7 @@ static int cxgb_up(struct adapter *adap) adap->name, adap))) goto irq_err; + enable_all_napi(adap); t3_sge_start(adap); t3_intr_enable(adap); @@ -1012,8 +975,10 @@ static int cxgb_open(struct net_device *dev) int other_ports = adapter->open_device_map & PORT_MASK; int err; - if (!adapter->open_device_map && (err = cxgb_up(adapter)) < 0) + if (!adapter->open_device_map && (err = cxgb_up(adapter)) < 0) { + quiesce_rx(adapter); return err; + } set_bit(pi->port_id, &adapter->open_device_map); if (is_offload(adapter) && !ofld_disable) { @@ -2524,7 +2489,6 @@ static int __devinit init_one(struct pci_dev *pdev, #ifdef CONFIG_NET_POLL_CONTROLLER netdev->poll_controller = cxgb_netpoll; #endif - netdev->weight = 64; SET_ETHTOOL_OPS(netdev, &cxgb_ethtool_ops); } @@ -2625,12 +2589,6 @@ static void __devexit remove_one(struct pci_dev *pdev) t3_free_sge_resources(adapter); cxgb_disable_msi(adapter); - for (i = 0; i < ARRAY_SIZE(adapter->dummy_netdev); i++) - if (adapter->dummy_netdev[i]) { - free_netdev(adapter->dummy_netdev[i]); - adapter->dummy_netdev[i] = NULL; - } - for_each_port(adapter, i) if (adapter->port[i]) free_netdev(adapter->port[i]); diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c index 58a5f60521e..069c1aca8a6 100644 --- a/drivers/net/cxgb3/sge.c +++ b/drivers/net/cxgb3/sge.c @@ -591,9 +591,6 @@ void t3_free_qset(struct adapter *adapter, struct sge_qset *q) q->rspq.desc, q->rspq.phys_addr); } - if (q->netdev) - q->netdev->atalk_ptr = NULL; - memset(q, 0, sizeof(*q)); } @@ -1074,7 +1071,7 @@ int t3_eth_xmit(struct sk_buff *skb, struct net_device *dev) unsigned int ndesc, pidx, credits, gen, compl; const struct port_info *pi = netdev_priv(dev); struct adapter *adap = pi->adapter; - struct sge_qset *qs = dev2qset(dev); + struct sge_qset *qs = pi->qs; struct sge_txq *q = &qs->txq[TXQ_ETH]; /* @@ -1326,13 +1323,12 @@ static void restart_ctrlq(unsigned long data) struct sk_buff *skb; struct sge_qset *qs = (struct sge_qset *)data; struct sge_txq *q = &qs->txq[TXQ_CTRL]; - const struct port_info *pi = netdev_priv(qs->netdev); - struct adapter *adap = pi->adapter; spin_lock(&q->lock); again:reclaim_completed_tx_imm(q); - while (q->in_use < q->size && (skb = __skb_dequeue(&q->sendq)) != NULL) { + while (q->in_use < q->size && + (skb = __skb_dequeue(&q->sendq)) != NULL) { write_imm(&q->desc[q->pidx], skb, skb->len, q->gen); @@ -1354,7 +1350,7 @@ static void restart_ctrlq(unsigned long data) } spin_unlock(&q->lock); - t3_write_reg(adap, A_SG_KDOORBELL, + t3_write_reg(qs->adap, A_SG_KDOORBELL, F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); } @@ -1638,8 +1634,7 @@ static inline void offload_enqueue(struct sge_rspq *q, struct sk_buff *skb) else { struct sge_qset *qs = rspq_to_qset(q); - if (__netif_rx_schedule_prep(qs->netdev)) - __netif_rx_schedule(qs->netdev); + napi_schedule(&qs->napi); q->rx_head = skb; } q->rx_tail = skb; @@ -1675,34 +1670,30 @@ static inline void deliver_partial_bundle(struct t3cdev *tdev, * receive handler. Batches need to be of modest size as we do prefetches * on the packets in each. */ -static int ofld_poll(struct net_device *dev, int *budget) +static int ofld_poll(struct napi_struct *napi, int budget) { - const struct port_info *pi = netdev_priv(dev); - struct adapter *adapter = pi->adapter; - struct sge_qset *qs = dev2qset(dev); + struct sge_qset *qs = container_of(napi, struct sge_qset, napi); struct sge_rspq *q = &qs->rspq; - int work_done, limit = min(*budget, dev->quota), avail = limit; + struct adapter *adapter = qs->adap; + int work_done = 0; - while (avail) { + while (work_done < budget) { struct sk_buff *head, *tail, *skbs[RX_BUNDLE_SIZE]; int ngathered; spin_lock_irq(&q->lock); head = q->rx_head; if (!head) { - work_done = limit - avail; - *budget -= work_done; - dev->quota -= work_done; - __netif_rx_complete(dev); + napi_complete(napi); spin_unlock_irq(&q->lock); - return 0; + return work_done; } tail = q->rx_tail; q->rx_head = q->rx_tail = NULL; spin_unlock_irq(&q->lock); - for (ngathered = 0; avail && head; avail--) { + for (ngathered = 0; work_done < budget && head; work_done++) { prefetch(head->data); skbs[ngathered] = head; head = head->next; @@ -1724,10 +1715,8 @@ static int ofld_poll(struct net_device *dev, int *budget) } deliver_partial_bundle(&adapter->tdev, q, skbs, ngathered); } - work_done = limit - avail; - *budget -= work_done; - dev->quota -= work_done; - return 1; + + return work_done; } /** @@ -2071,50 +2060,47 @@ static inline int is_pure_response(const struct rsp_desc *r) /** * napi_rx_handler - the NAPI handler for Rx processing - * @dev: the net device + * @napi: the napi instance * @budget: how many packets we can process in this round * * Handler for new data events when using NAPI. */ -static int napi_rx_handler(struct net_device *dev, int *budget) +static int napi_rx_handler(struct napi_struct *napi, int budget) { - const struct port_info *pi = netdev_priv(dev); - struct adapter *adap = pi->adapter; - struct sge_qset *qs = dev2qset(dev); - int effective_budget = min(*budget, dev->quota); - - int work_done = process_responses(adap, qs, effective_budget); - *budget -= work_done; - dev->quota -= work_done; + struct sge_qset *qs = container_of(napi, struct sge_qset, napi); + struct adapter *adap = qs->adap; + int work_done = process_responses(adap, qs, budget); - if (work_done >= effective_budget) - return 1; - - netif_rx_complete(dev); + if (likely(work_done < budget)) { + napi_complete(napi); - /* - * Because we don't atomically flush the following write it is - * possible that in very rare cases it can reach the device in a way - * that races with a new response being written plus an error interrupt - * causing the NAPI interrupt handler below to return unhandled status - * to the OS. To protect against this would require flushing the write - * and doing both the write and the flush with interrupts off. Way too - * expensive and unjustifiable given the rarity of the race. - * - * The race cannot happen at all with MSI-X. - */ - t3_write_reg(adap, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | - V_NEWTIMER(qs->rspq.next_holdoff) | - V_NEWINDEX(qs->rspq.cidx)); - return 0; + /* + * Because we don't atomically flush the following + * write it is possible that in very rare cases it can + * reach the device in a way that races with a new + * response being written plus an error interrupt + * causing the NAPI interrupt handler below to return + * unhandled status to the OS. To protect against + * this would require flushing the write and doing + * both the write and the flush with interrupts off. + * Way too expensive and unjustifiable given the + * rarity of the race. + * + * The race cannot happen at all with MSI-X. + */ + t3_write_reg(adap, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | + V_NEWTIMER(qs->rspq.next_holdoff) | + V_NEWINDEX(qs->rspq.cidx)); + } + return work_done; } /* * Returns true if the device is already scheduled for polling. */ -static inline int napi_is_scheduled(struct net_device *dev) +static inline int napi_is_scheduled(struct napi_struct *napi) { - return test_bit(__LINK_STATE_RX_SCHED, &dev->state); + return test_bit(NAPI_STATE_SCHED, &napi->state); } /** @@ -2197,8 +2183,7 @@ static inline int handle_responses(struct adapter *adap, struct sge_rspq *q) V_NEWTIMER(q->holdoff_tmr) | V_NEWINDEX(q->cidx)); return 0; } - if (likely(__netif_rx_schedule_prep(qs->netdev))) - __netif_rx_schedule(qs->netdev); + napi_schedule(&qs->napi); return 1; } @@ -2209,8 +2194,7 @@ static inline int handle_responses(struct adapter *adap, struct sge_rspq *q) irqreturn_t t3_sge_intr_msix(int irq, void *cookie) { struct sge_qset *qs = cookie; - const struct port_info *pi = netdev_priv(qs->netdev); - struct adapter *adap = pi->adapter; + struct adapter *adap = qs->adap; struct sge_rspq *q = &qs->rspq; spin_lock(&q->lock); @@ -2229,13 +2213,11 @@ irqreturn_t t3_sge_intr_msix(int irq, void *cookie) irqreturn_t t3_sge_intr_msix_napi(int irq, void *cookie) { struct sge_qset *qs = cookie; - const struct port_info *pi = netdev_priv(qs->netdev); - struct adapter *adap = pi->adapter; struct sge_rspq *q = &qs->rspq; spin_lock(&q->lock); - if (handle_responses(adap, q) < 0) + if (handle_responses(qs->adap, q) < 0) q->unhandled_irqs++; spin_unlock(&q->lock); return IRQ_HANDLED; @@ -2278,11 +2260,13 @@ static irqreturn_t t3_intr_msi(int irq, void *cookie) return IRQ_HANDLED; } -static int rspq_check_napi(struct net_device *dev, struct sge_rspq *q) +static int rspq_check_napi(struct sge_qset *qs) { - if (!napi_is_scheduled(dev) && is_new_response(&q->desc[q->cidx], q)) { - if (likely(__netif_rx_schedule_prep(dev))) - __netif_rx_schedule(dev); + struct sge_rspq *q = &qs->rspq; + + if (!napi_is_scheduled(&qs->napi) && + is_new_response(&q->desc[q->cidx], q)) { + napi_schedule(&qs->napi); return 1; } return 0; @@ -2303,10 +2287,9 @@ irqreturn_t t3_intr_msi_napi(int irq, void *cookie) spin_lock(&q->lock); - new_packets = rspq_check_napi(adap->sge.qs[0].netdev, q); + new_packets = rspq_check_napi(&adap->sge.qs[0]); if (adap->params.nports == 2) - new_packets += rspq_check_napi(adap->sge.qs[1].netdev, - &adap->sge.qs[1].rspq); + new_packets += rspq_check_napi(&adap->sge.qs[1]); if (!new_packets && t3_slow_intr_handler(adap) == 0) q->unhandled_irqs++; @@ -2409,9 +2392,9 @@ static irqreturn_t t3b_intr(int irq, void *cookie) static irqreturn_t t3b_intr_napi(int irq, void *cookie) { u32 map; - struct net_device *dev; struct adapter *adap = cookie; - struct sge_rspq *q0 = &adap->sge.qs[0].rspq; + struct sge_qset *qs0 = &adap->sge.qs[0]; + struct sge_rspq *q0 = &qs0->rspq; t3_write_reg(adap, A_PL_CLI, 0); map = t3_read_reg(adap, A_SG_DATA_INTR); @@ -2424,18 +2407,11 @@ static irqreturn_t t3b_intr_napi(int irq, void *cookie) if (unlikely(map & F_ERRINTR)) t3_slow_intr_handler(adap); - if (likely(map & 1)) { - dev = adap->sge.qs[0].netdev; - - if (likely(__netif_rx_schedule_prep(dev))) - __netif_rx_schedule(dev); - } - if (map & 2) { - dev = adap->sge.qs[1].netdev; + if (likely(map & 1)) + napi_schedule(&qs0->napi); - if (likely(__netif_rx_schedule_prep(dev))) - __netif_rx_schedule(dev); - } + if (map & 2) + napi_schedule(&adap->sge.qs[1].napi); spin_unlock(&q0->lock); return IRQ_HANDLED; @@ -2514,8 +2490,7 @@ static void sge_timer_cb(unsigned long data) { spinlock_t *lock; struct sge_qset *qs = (struct sge_qset *)data; - const struct port_info *pi = netdev_priv(qs->netdev); - struct adapter *adap = pi->adapter; + struct adapter *adap = qs->adap; if (spin_trylock(&qs->txq[TXQ_ETH].lock)) { reclaim_completed_tx(adap, &qs->txq[TXQ_ETH]); @@ -2526,9 +2501,9 @@ static void sge_timer_cb(unsigned long data) spin_unlock(&qs->txq[TXQ_OFLD].lock); } lock = (adap->flags & USING_MSIX) ? &qs->rspq.lock : - &adap->sge.qs[0].rspq.lock; + &adap->sge.qs[0].rspq.lock; if (spin_trylock_irq(lock)) { - if (!napi_is_scheduled(qs->netdev)) { + if (!napi_is_scheduled(&qs->napi)) { u32 status = t3_read_reg(adap, A_SG_RSPQ_FL_STATUS); if (qs->fl[0].credits < qs->fl[0].size) @@ -2562,12 +2537,9 @@ static void sge_timer_cb(unsigned long data) */ void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) { - if (!qs->netdev) - return; - qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);/* can't be 0 */ qs->rspq.polling = p->polling; - qs->netdev->poll = p->polling ? napi_rx_handler : ofld_poll; + qs->napi.poll = p->polling ? napi_rx_handler : ofld_poll; } /** @@ -2587,7 +2559,7 @@ void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) */ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports, int irq_vec_idx, const struct qset_params *p, - int ntxq, struct net_device *netdev) + int ntxq, struct net_device *dev) { int i, ret = -ENOMEM; struct sge_qset *q = &adapter->sge.qs[id]; @@ -2708,16 +2680,10 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports, } spin_unlock(&adapter->sge.reg_lock); - q->netdev = netdev; - t3_update_qset_coalesce(q, p); - /* - * We use atalk_ptr as a backpointer to a qset. In case a device is - * associated with multiple queue sets only the first one sets - * atalk_ptr. - */ - if (netdev->atalk_ptr == NULL) - netdev->atalk_ptr = q; + q->adap = adapter; + q->netdev = dev; + t3_update_qset_coalesce(q, p); refill_fl(adapter, &q->fl[0], q->fl[0].size, GFP_KERNEL); refill_fl(adapter, &q->fl[1], q->fl[1].size, GFP_KERNEL); -- cgit v1.2.3