From 07f2211e4fbce6990722d78c4f04225da9c0e9cf Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 5 Jan 2009 17:14:31 -0700
Subject: dmaengine: remove dependency on async_tx

async_tx.ko is a consumer of dma channels.  A circular dependency arises
if modules in drivers/dma rely on common code in async_tx.ko.  It
prevents either module from being unloaded.

Move dma_wait_for_async_tx and async_tx_run_dependencies to dmaeninge.o
where they should have been from the beginning.

Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 crypto/async_tx/async_tx.c | 75 ----------------------------------------------
 1 file changed, 75 deletions(-)

(limited to 'crypto/async_tx/async_tx.c')

diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c
index dcbf1be149f..8cfac182165 100644
--- a/crypto/async_tx/async_tx.c
+++ b/crypto/async_tx/async_tx.c
@@ -72,81 +72,6 @@ void async_tx_issue_pending_all(void)
 }
 EXPORT_SYMBOL_GPL(async_tx_issue_pending_all);
 
-/* dma_wait_for_async_tx - spin wait for a transcation to complete
- * @tx: transaction to wait on
- */
-enum dma_status
-dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
-{
-	enum dma_status status;
-	struct dma_async_tx_descriptor *iter;
-	struct dma_async_tx_descriptor *parent;
-
-	if (!tx)
-		return DMA_SUCCESS;
-
-	/* poll through the dependency chain, return when tx is complete */
-	do {
-		iter = tx;
-
-		/* find the root of the unsubmitted dependency chain */
-		do {
-			parent = iter->parent;
-			if (!parent)
-				break;
-			else
-				iter = parent;
-		} while (parent);
-
-		/* there is a small window for ->parent == NULL and
-		 * ->cookie == -EBUSY
-		 */
-		while (iter->cookie == -EBUSY)
-			cpu_relax();
-
-		status = dma_sync_wait(iter->chan, iter->cookie);
-	} while (status == DMA_IN_PROGRESS || (iter != tx));
-
-	return status;
-}
-EXPORT_SYMBOL_GPL(dma_wait_for_async_tx);
-
-/* async_tx_run_dependencies - helper routine for dma drivers to process
- *	(start) dependent operations on their target channel
- * @tx: transaction with dependencies
- */
-void async_tx_run_dependencies(struct dma_async_tx_descriptor *tx)
-{
-	struct dma_async_tx_descriptor *dep = tx->next;
-	struct dma_async_tx_descriptor *dep_next;
-	struct dma_chan *chan;
-
-	if (!dep)
-		return;
-
-	chan = dep->chan;
-
-	/* keep submitting up until a channel switch is detected
-	 * in that case we will be called again as a result of
-	 * processing the interrupt from async_tx_channel_switch
-	 */
-	for (; dep; dep = dep_next) {
-		spin_lock_bh(&dep->lock);
-		dep->parent = NULL;
-		dep_next = dep->next;
-		if (dep_next && dep_next->chan == chan)
-			dep->next = NULL; /* ->next will be submitted */
-		else
-			dep_next = NULL; /* submit current dep and terminate */
-		spin_unlock_bh(&dep->lock);
-
-		dep->tx_submit(dep);
-	}
-
-	chan->device->device_issue_pending(chan);
-}
-EXPORT_SYMBOL_GPL(async_tx_run_dependencies);
-
 static void
 free_dma_chan_ref(struct rcu_head *rcu)
 {
-- 
cgit v1.2.3


From 6f49a57aa5a0c6d4e4e27c85f7af6c83325a12d1 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 6 Jan 2009 11:38:14 -0700
Subject: dmaengine: up-level reference counting to the module level

Simply, if a client wants any dmaengine channel then prevent all dmaengine
modules from being removed.  Once the clients are done re-enable module
removal.

Why?, beyond reducing complication:
1/ Tracking reference counts per-transaction in an efficient manner, as
   is currently done, requires a complicated scheme to avoid cache-line
   bouncing effects.
2/ Per-transaction ref-counting gives the false impression that a
   dma-driver can be gracefully removed ahead of its user (net, md, or
   dma-slave)
3/ None of the in-tree dma-drivers talk to hot pluggable hardware, but
   if such an engine were built one day we still would not need to notify
   clients of remove events.  The driver can simply return NULL to a
   ->prep() request, something that is much easier for a client to handle.

Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Maciej Sosnowski <maciej.sosnowski@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 crypto/async_tx/async_tx.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'crypto/async_tx/async_tx.c')

diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c
index 8cfac182165..43fe4cbe71e 100644
--- a/crypto/async_tx/async_tx.c
+++ b/crypto/async_tx/async_tx.c
@@ -198,8 +198,6 @@ dma_channel_add_remove(struct dma_client *client,
 		/* add the channel to the generic management list */
 		master_ref = kmalloc(sizeof(*master_ref), GFP_KERNEL);
 		if (master_ref) {
-			/* keep a reference until async_tx is unloaded */
-			dma_chan_get(chan);
 			init_dma_chan_ref(master_ref, chan);
 			spin_lock_irqsave(&async_tx_lock, flags);
 			list_add_tail_rcu(&master_ref->node,
@@ -221,8 +219,6 @@ dma_channel_add_remove(struct dma_client *client,
 		spin_lock_irqsave(&async_tx_lock, flags);
 		list_for_each_entry(ref, &async_tx_master_list, node)
 			if (ref->chan == chan) {
-				/* permit backing devices to go away */
-				dma_chan_put(ref->chan);
 				list_del_rcu(&ref->node);
 				call_rcu(&ref->rcu, free_dma_chan_ref);
 				found = 1;
-- 
cgit v1.2.3


From bec085134e446577a983f17f57d642a88d1af53b Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 6 Jan 2009 11:38:14 -0700
Subject: dmaengine: centralize channel allocation, introduce dma_find_channel

Allowing multiple clients to each define their own channel allocation
scheme quickly leads to a pathological situation.  For memory-to-memory
offload all clients can share a central allocator.

This simply moves the existing async_tx allocator to dmaengine with
minimal fixups:
* async_tx.c:get_chan_ref_by_cap --> dmaengine.c:nth_chan
* async_tx.c:async_tx_rebalance --> dmaengine.c:dma_channel_rebalance
* split out common code from async_tx.c:__async_tx_find_channel -->
  dma_find_channel

Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 crypto/async_tx/async_tx.c | 146 ++-------------------------------------------
 1 file changed, 4 insertions(+), 142 deletions(-)

(limited to 'crypto/async_tx/async_tx.c')

diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c
index 43fe4cbe71e..b88bb1f608f 100644
--- a/crypto/async_tx/async_tx.c
+++ b/crypto/async_tx/async_tx.c
@@ -37,26 +37,11 @@ static struct dma_client async_tx_dma = {
 	/* .cap_mask == 0 defaults to all channels */
 };
 
-/**
- * dma_cap_mask_all - enable iteration over all operation types
- */
-static dma_cap_mask_t dma_cap_mask_all;
-
-/**
- * chan_ref_percpu - tracks channel allocations per core/opertion
- */
-struct chan_ref_percpu {
-	struct dma_chan_ref *ref;
-};
-
-static int channel_table_initialized;
-static struct chan_ref_percpu *channel_table[DMA_TX_TYPE_END];
-
 /**
  * async_tx_lock - protect modification of async_tx_master_list and serialize
  *	rebalance operations
  */
-static spinlock_t async_tx_lock;
+static DEFINE_SPINLOCK(async_tx_lock);
 
 static LIST_HEAD(async_tx_master_list);
 
@@ -89,85 +74,6 @@ init_dma_chan_ref(struct dma_chan_ref *ref, struct dma_chan *chan)
 	atomic_set(&ref->count, 0);
 }
 
-/**
- * get_chan_ref_by_cap - returns the nth channel of the given capability
- * 	defaults to returning the channel with the desired capability and the
- * 	lowest reference count if the index can not be satisfied
- * @cap: capability to match
- * @index: nth channel desired, passing -1 has the effect of forcing the
- *  default return value
- */
-static struct dma_chan_ref *
-get_chan_ref_by_cap(enum dma_transaction_type cap, int index)
-{
-	struct dma_chan_ref *ret_ref = NULL, *min_ref = NULL, *ref;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(ref, &async_tx_master_list, node)
-		if (dma_has_cap(cap, ref->chan->device->cap_mask)) {
-			if (!min_ref)
-				min_ref = ref;
-			else if (atomic_read(&ref->count) <
-				atomic_read(&min_ref->count))
-				min_ref = ref;
-
-			if (index-- == 0) {
-				ret_ref = ref;
-				break;
-			}
-		}
-	rcu_read_unlock();
-
-	if (!ret_ref)
-		ret_ref = min_ref;
-
-	if (ret_ref)
-		atomic_inc(&ret_ref->count);
-
-	return ret_ref;
-}
-
-/**
- * async_tx_rebalance - redistribute the available channels, optimize
- * for cpu isolation in the SMP case, and opertaion isolation in the
- * uniprocessor case
- */
-static void async_tx_rebalance(void)
-{
-	int cpu, cap, cpu_idx = 0;
-	unsigned long flags;
-
-	if (!channel_table_initialized)
-		return;
-
-	spin_lock_irqsave(&async_tx_lock, flags);
-
-	/* undo the last distribution */
-	for_each_dma_cap_mask(cap, dma_cap_mask_all)
-		for_each_possible_cpu(cpu) {
-			struct dma_chan_ref *ref =
-				per_cpu_ptr(channel_table[cap], cpu)->ref;
-			if (ref) {
-				atomic_set(&ref->count, 0);
-				per_cpu_ptr(channel_table[cap], cpu)->ref =
-									NULL;
-			}
-		}
-
-	for_each_dma_cap_mask(cap, dma_cap_mask_all)
-		for_each_online_cpu(cpu) {
-			struct dma_chan_ref *new;
-			if (NR_CPUS > 1)
-				new = get_chan_ref_by_cap(cap, cpu_idx++);
-			else
-				new = get_chan_ref_by_cap(cap, -1);
-
-			per_cpu_ptr(channel_table[cap], cpu)->ref = new;
-		}
-
-	spin_unlock_irqrestore(&async_tx_lock, flags);
-}
-
 static enum dma_state_client
 dma_channel_add_remove(struct dma_client *client,
 	struct dma_chan *chan, enum dma_state state)
@@ -211,8 +117,6 @@ dma_channel_add_remove(struct dma_client *client,
 				" (-ENOMEM)\n");
 			return 0;
 		}
-
-		async_tx_rebalance();
 		break;
 	case DMA_RESOURCE_REMOVED:
 		found = 0;
@@ -233,8 +137,6 @@ dma_channel_add_remove(struct dma_client *client,
 			ack = DMA_ACK;
 		else
 			break;
-
-		async_tx_rebalance();
 		break;
 	case DMA_RESOURCE_SUSPEND:
 	case DMA_RESOURCE_RESUME:
@@ -248,51 +150,18 @@ dma_channel_add_remove(struct dma_client *client,
 	return ack;
 }
 
-static int __init
-async_tx_init(void)
+static int __init async_tx_init(void)
 {
-	enum dma_transaction_type cap;
-
-	spin_lock_init(&async_tx_lock);
-	bitmap_fill(dma_cap_mask_all.bits, DMA_TX_TYPE_END);
-
-	/* an interrupt will never be an explicit operation type.
-	 * clearing this bit prevents allocation to a slot in 'channel_table'
-	 */
-	clear_bit(DMA_INTERRUPT, dma_cap_mask_all.bits);
-
-	for_each_dma_cap_mask(cap, dma_cap_mask_all) {
-		channel_table[cap] = alloc_percpu(struct chan_ref_percpu);
-		if (!channel_table[cap])
-			goto err;
-	}
-
-	channel_table_initialized = 1;
 	dma_async_client_register(&async_tx_dma);
 	dma_async_client_chan_request(&async_tx_dma);
 
 	printk(KERN_INFO "async_tx: api initialized (async)\n");
 
 	return 0;
-err:
-	printk(KERN_ERR "async_tx: initialization failure\n");
-
-	while (--cap >= 0)
-		free_percpu(channel_table[cap]);
-
-	return 1;
 }
 
 static void __exit async_tx_exit(void)
 {
-	enum dma_transaction_type cap;
-
-	channel_table_initialized = 0;
-
-	for_each_dma_cap_mask(cap, dma_cap_mask_all)
-		if (channel_table[cap])
-			free_percpu(channel_table[cap]);
-
 	dma_async_client_unregister(&async_tx_dma);
 }
 
@@ -308,16 +177,9 @@ __async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
 {
 	/* see if we can keep the chain on one channel */
 	if (depend_tx &&
-		dma_has_cap(tx_type, depend_tx->chan->device->cap_mask))
+	    dma_has_cap(tx_type, depend_tx->chan->device->cap_mask))
 		return depend_tx->chan;
-	else if (likely(channel_table_initialized)) {
-		struct dma_chan_ref *ref;
-		int cpu = get_cpu();
-		ref = per_cpu_ptr(channel_table[tx_type], cpu)->ref;
-		put_cpu();
-		return ref ? ref->chan : NULL;
-	} else
-		return NULL;
+	return dma_find_channel(tx_type);
 }
 EXPORT_SYMBOL_GPL(__async_tx_find_channel);
 #else
-- 
cgit v1.2.3


From 2ba05622b8b143b0c95968ba59bddfbd6d2f2559 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 6 Jan 2009 11:38:14 -0700
Subject: dmaengine: provide a common 'issue_pending_all' implementation

async_tx and net_dma each have open-coded versions of issue_pending_all,
so provide a common routine in dmaengine.

The implementation needs to walk the global device list, so implement
rcu to allow dma_issue_pending_all to run lockless.  Clients protect
themselves from channel removal events by holding a dmaengine reference.

Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 crypto/async_tx/async_tx.c | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'crypto/async_tx/async_tx.c')

diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c
index b88bb1f608f..2cdf7a0867b 100644
--- a/crypto/async_tx/async_tx.c
+++ b/crypto/async_tx/async_tx.c
@@ -45,18 +45,6 @@ static DEFINE_SPINLOCK(async_tx_lock);
 
 static LIST_HEAD(async_tx_master_list);
 
-/* async_tx_issue_pending_all - start all transactions on all channels */
-void async_tx_issue_pending_all(void)
-{
-	struct dma_chan_ref *ref;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(ref, &async_tx_master_list, node)
-		ref->chan->device->device_issue_pending(ref->chan);
-	rcu_read_unlock();
-}
-EXPORT_SYMBOL_GPL(async_tx_issue_pending_all);
-
 static void
 free_dma_chan_ref(struct rcu_head *rcu)
 {
-- 
cgit v1.2.3


From 209b84a88fe81341b4d8d465acc4a67cb7c3feb3 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 6 Jan 2009 11:38:17 -0700
Subject: dmaengine: replace dma_async_client_register with dmaengine_get

Now that clients no longer need to be notified of channel arrival
dma_async_client_register can simply increment the dmaengine_ref_count.

Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 crypto/async_tx/async_tx.c | 115 +--------------------------------------------
 1 file changed, 2 insertions(+), 113 deletions(-)

(limited to 'crypto/async_tx/async_tx.c')

diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c
index 2cdf7a0867b..f21147f3626 100644
--- a/crypto/async_tx/async_tx.c
+++ b/crypto/async_tx/async_tx.c
@@ -28,120 +28,9 @@
 #include <linux/async_tx.h>
 
 #ifdef CONFIG_DMA_ENGINE
-static enum dma_state_client
-dma_channel_add_remove(struct dma_client *client,
-	struct dma_chan *chan, enum dma_state state);
-
-static struct dma_client async_tx_dma = {
-	.event_callback = dma_channel_add_remove,
-	/* .cap_mask == 0 defaults to all channels */
-};
-
-/**
- * async_tx_lock - protect modification of async_tx_master_list and serialize
- *	rebalance operations
- */
-static DEFINE_SPINLOCK(async_tx_lock);
-
-static LIST_HEAD(async_tx_master_list);
-
-static void
-free_dma_chan_ref(struct rcu_head *rcu)
-{
-	struct dma_chan_ref *ref;
-	ref = container_of(rcu, struct dma_chan_ref, rcu);
-	kfree(ref);
-}
-
-static void
-init_dma_chan_ref(struct dma_chan_ref *ref, struct dma_chan *chan)
-{
-	INIT_LIST_HEAD(&ref->node);
-	INIT_RCU_HEAD(&ref->rcu);
-	ref->chan = chan;
-	atomic_set(&ref->count, 0);
-}
-
-static enum dma_state_client
-dma_channel_add_remove(struct dma_client *client,
-	struct dma_chan *chan, enum dma_state state)
-{
-	unsigned long found, flags;
-	struct dma_chan_ref *master_ref, *ref;
-	enum dma_state_client ack = DMA_DUP; /* default: take no action */
-
-	switch (state) {
-	case DMA_RESOURCE_AVAILABLE:
-		found = 0;
-		rcu_read_lock();
-		list_for_each_entry_rcu(ref, &async_tx_master_list, node)
-			if (ref->chan == chan) {
-				found = 1;
-				break;
-			}
-		rcu_read_unlock();
-
-		pr_debug("async_tx: dma resource available [%s]\n",
-			found ? "old" : "new");
-
-		if (!found)
-			ack = DMA_ACK;
-		else
-			break;
-
-		/* add the channel to the generic management list */
-		master_ref = kmalloc(sizeof(*master_ref), GFP_KERNEL);
-		if (master_ref) {
-			init_dma_chan_ref(master_ref, chan);
-			spin_lock_irqsave(&async_tx_lock, flags);
-			list_add_tail_rcu(&master_ref->node,
-				&async_tx_master_list);
-			spin_unlock_irqrestore(&async_tx_lock,
-				flags);
-		} else {
-			printk(KERN_WARNING "async_tx: unable to create"
-				" new master entry in response to"
-				" a DMA_RESOURCE_ADDED event"
-				" (-ENOMEM)\n");
-			return 0;
-		}
-		break;
-	case DMA_RESOURCE_REMOVED:
-		found = 0;
-		spin_lock_irqsave(&async_tx_lock, flags);
-		list_for_each_entry(ref, &async_tx_master_list, node)
-			if (ref->chan == chan) {
-				list_del_rcu(&ref->node);
-				call_rcu(&ref->rcu, free_dma_chan_ref);
-				found = 1;
-				break;
-			}
-		spin_unlock_irqrestore(&async_tx_lock, flags);
-
-		pr_debug("async_tx: dma resource removed [%s]\n",
-			found ? "ours" : "not ours");
-
-		if (found)
-			ack = DMA_ACK;
-		else
-			break;
-		break;
-	case DMA_RESOURCE_SUSPEND:
-	case DMA_RESOURCE_RESUME:
-		printk(KERN_WARNING "async_tx: does not support dma channel"
-			" suspend/resume\n");
-		break;
-	default:
-		BUG();
-	}
-
-	return ack;
-}
-
 static int __init async_tx_init(void)
 {
-	dma_async_client_register(&async_tx_dma);
-	dma_async_client_chan_request(&async_tx_dma);
+	dmaengine_get();
 
 	printk(KERN_INFO "async_tx: api initialized (async)\n");
 
@@ -150,7 +39,7 @@ static int __init async_tx_init(void)
 
 static void __exit async_tx_exit(void)
 {
-	dma_async_client_unregister(&async_tx_dma);
+	dmaengine_put();
 }
 
 /**
-- 
cgit v1.2.3