From 0ff6ce7b36199f67f709c750e9a2a66659a4babe Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 17 Aug 2009 12:25:37 +0200 Subject: cfg80211: fix deadlock When removing an interface with nl80211, cfg80211 will deadlock in the netdev notifier because we're already holding rdev->mtx and try to acquire it again to verify the scan has been done. This bug was introduced by my patch "cfg80211: check for and abort dangling scan requests". To fix this, move the dangling scan request check into wiphy_unregister(). This will not be able to catch all cases right away, but if the scan problem happens with a manual ifdown or so it will be possible to remedy it by removing the module/device. Additionally, add comments about the deadlock scenario. Reported-by: Christian Lamparter Signed-off-by: Johannes Berg Tested-by: Christian Lamparter Tested-by: Kalle Valo Signed-off-by: John W. Linville --- net/wireless/core.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) (limited to 'net/wireless/core.c') diff --git a/net/wireless/core.c b/net/wireless/core.c index bc99e4ec746..69a185ba9ff 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -586,9 +586,14 @@ void wiphy_unregister(struct wiphy *wiphy) * get to lock contention here if userspace issues a command * that identified the hardware by wiphy index. */ - mutex_lock(&rdev->mtx); - /* unlock again before freeing */ - mutex_unlock(&rdev->mtx); + cfg80211_lock_rdev(rdev); + + if (WARN_ON(rdev->scan_req)) { + rdev->scan_req->aborted = true; + ___cfg80211_scan_done(rdev); + } + + cfg80211_unlock_rdev(rdev); cfg80211_debugfs_rdev_del(rdev); @@ -605,7 +610,6 @@ void wiphy_unregister(struct wiphy *wiphy) flush_work(&rdev->scan_done_wk); cancel_work_sync(&rdev->conn_work); - kfree(rdev->scan_req); flush_work(&rdev->event_work); } EXPORT_SYMBOL(wiphy_unregister); @@ -653,6 +657,11 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, switch (state) { case NETDEV_REGISTER: + /* + * NB: cannot take rdev->mtx here because this may be + * called within code protected by it when interfaces + * are added with nl80211. + */ mutex_init(&wdev->mtx); INIT_LIST_HEAD(&wdev->event_list); spin_lock_init(&wdev->event_lock); @@ -730,13 +739,11 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, #endif break; case NETDEV_UNREGISTER: - cfg80211_lock_rdev(rdev); - - if (WARN_ON(rdev->scan_req && rdev->scan_req->dev == dev)) { - rdev->scan_req->aborted = true; - ___cfg80211_scan_done(rdev); - } - + /* + * NB: cannot take rdev->mtx here because this may be + * called within code protected by it when interfaces + * are removed with nl80211. + */ mutex_lock(&rdev->devlist_mtx); /* * It is possible to get NETDEV_UNREGISTER @@ -755,7 +762,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, #endif } mutex_unlock(&rdev->devlist_mtx); - cfg80211_unlock_rdev(rdev); break; case NETDEV_PRE_UP: if (!(wdev->wiphy->interface_modes & BIT(wdev->iftype))) -- cgit v1.2.3 From ad002395fd230528281083f4be71855ed7e35b04 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 18 Aug 2009 19:51:57 +0200 Subject: cfg80211: fix dangling scan request checking My patch "cfg80211: fix deadlock" broke the code it was supposed to fix, the scan request checking. But it's not trivial to put it back the way it was, since the original patch had a deadlock. Now do it in a completely new way: queue the check off to a work struct, where we can freely lock. But that has some more complications, like needing to wait for it to be done before the wiphy/rdev can be destroyed, so some code is required to handle that. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/core.c | 76 ++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 63 insertions(+), 13 deletions(-) (limited to 'net/wireless/core.c') diff --git a/net/wireless/core.c b/net/wireless/core.c index 69a185ba9ff..c150071b6f2 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -430,6 +430,8 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) INIT_WORK(&rdev->conn_work, cfg80211_conn_work); INIT_WORK(&rdev->event_work, cfg80211_event_work); + init_waitqueue_head(&rdev->dev_wait); + /* * Initialize wiphy parameters to IEEE 802.11 MIB default values. * Fragmentation and RTS threshold are disabled by default with the @@ -574,7 +576,23 @@ void wiphy_unregister(struct wiphy *wiphy) /* protect the device list */ mutex_lock(&cfg80211_mutex); + wait_event(rdev->dev_wait, ({ + int __count; + mutex_lock(&rdev->devlist_mtx); + __count = rdev->opencount; + mutex_unlock(&rdev->devlist_mtx); + __count == 0;})); + + mutex_lock(&rdev->devlist_mtx); BUG_ON(!list_empty(&rdev->netdev_list)); + mutex_unlock(&rdev->devlist_mtx); + + /* + * First remove the hardware from everywhere, this makes + * it impossible to find from userspace. + */ + cfg80211_debugfs_rdev_del(rdev); + list_del(&rdev->list); /* * Try to grab rdev->mtx. If a command is still in progress, @@ -582,26 +600,18 @@ void wiphy_unregister(struct wiphy *wiphy) * down the device already. We wait for this command to complete * before unlinking the item from the list. * Note: as codified by the BUG_ON above we cannot get here if - * a virtual interface is still associated. Hence, we can only - * get to lock contention here if userspace issues a command - * that identified the hardware by wiphy index. + * a virtual interface is still present. Hence, we can only get + * to lock contention here if userspace issues a command that + * identified the hardware by wiphy index. */ cfg80211_lock_rdev(rdev); - - if (WARN_ON(rdev->scan_req)) { - rdev->scan_req->aborted = true; - ___cfg80211_scan_done(rdev); - } - + /* nothing */ cfg80211_unlock_rdev(rdev); - cfg80211_debugfs_rdev_del(rdev); - /* If this device got a regulatory hint tell core its * free to listen now to a new shiny device regulatory hint */ reg_device_remove(wiphy); - list_del(&rdev->list); cfg80211_rdev_list_generation++; device_del(&rdev->wiphy.dev); debugfs_remove(rdev->wiphy.debugfsdir); @@ -640,6 +650,31 @@ void wiphy_rfkill_set_hw_state(struct wiphy *wiphy, bool blocked) } EXPORT_SYMBOL(wiphy_rfkill_set_hw_state); +static void wdev_cleanup_work(struct work_struct *work) +{ + struct wireless_dev *wdev; + struct cfg80211_registered_device *rdev; + + wdev = container_of(work, struct wireless_dev, cleanup_work); + rdev = wiphy_to_dev(wdev->wiphy); + + cfg80211_lock_rdev(rdev); + + if (WARN_ON(rdev->scan_req && rdev->scan_req->dev == wdev->netdev)) { + rdev->scan_req->aborted = true; + ___cfg80211_scan_done(rdev); + } + + cfg80211_unlock_rdev(rdev); + + mutex_lock(&rdev->devlist_mtx); + rdev->opencount--; + mutex_unlock(&rdev->devlist_mtx); + wake_up(&rdev->dev_wait); + + dev_put(wdev->netdev); +} + static int cfg80211_netdev_notifier_call(struct notifier_block * nb, unsigned long state, void *ndev) @@ -663,6 +698,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, * are added with nl80211. */ mutex_init(&wdev->mtx); + INIT_WORK(&wdev->cleanup_work, wdev_cleanup_work); INIT_LIST_HEAD(&wdev->event_list); spin_lock_init(&wdev->event_lock); mutex_lock(&rdev->devlist_mtx); @@ -717,8 +753,22 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, default: break; } + dev_hold(dev); + schedule_work(&wdev->cleanup_work); break; case NETDEV_UP: + /* + * If we have a really quick DOWN/UP succession we may + * have this work still pending ... cancel it and see + * if it was pending, in which case we need to account + * for some of the work it would have done. + */ + if (cancel_work_sync(&wdev->cleanup_work)) { + mutex_lock(&rdev->devlist_mtx); + rdev->opencount--; + mutex_unlock(&rdev->devlist_mtx); + dev_put(dev); + } #ifdef CONFIG_WIRELESS_EXT cfg80211_lock_rdev(rdev); mutex_lock(&rdev->devlist_mtx); @@ -734,6 +784,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, break; } wdev_unlock(wdev); + rdev->opencount++; mutex_unlock(&rdev->devlist_mtx); cfg80211_unlock_rdev(rdev); #endif @@ -756,7 +807,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, sysfs_remove_link(&dev->dev.kobj, "phy80211"); list_del_init(&wdev->list); rdev->devlist_generation++; - mutex_destroy(&wdev->mtx); #ifdef CONFIG_WIRELESS_EXT kfree(wdev->wext.keys); #endif -- cgit v1.2.3 From 16a832e785820aa199641c77b2d6f4a443d2ec46 Mon Sep 17 00:00:00 2001 From: Zhu Yi Date: Wed, 19 Aug 2009 16:08:22 +0800 Subject: cfg80211: allow cfg80211_connect_result with bssid == NULL In case of connection failure, the bssid info is not a must have. Signed-off-by: Zhu Yi Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/wireless/core.c') diff --git a/net/wireless/core.c b/net/wireless/core.c index c150071b6f2..154e1e294cb 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -309,7 +310,8 @@ static void cfg80211_process_events(struct wireless_dev *wdev) switch (ev->type) { case EVENT_CONNECT_RESULT: __cfg80211_connect_result( - wdev->netdev, ev->cr.bssid, + wdev->netdev, is_zero_ether_addr(ev->cr.bssid) ? + NULL : ev->cr.bssid, ev->cr.req_ie, ev->cr.req_ie_len, ev->cr.resp_ie, ev->cr.resp_ie_len, ev->cr.status, -- cgit v1.2.3