diff options
author | Keith Packard <keithp@keithp.com> | 2007-12-04 12:22:30 -0800 |
---|---|---|
committer | Keith Packard <keithp@keithp.com> | 2007-12-11 20:23:00 -0800 |
commit | 57b9a54eb668477407c8be54c041d7a9f92c1f51 (patch) | |
tree | 45210a219a48374ac1a85e087aae5b1fcd145b5e /shared-core/i915_dma.c | |
parent | cfa21b22b43c7113107b5eb086b5f4d4ec36dc0a (diff) |
Allow relocation to be skipped when buffers don't move.
One of the costs of superioctl has been the need to perform relocations
inside the kernel. The cost of mapping the buffers to the CPU and writing
data is fairly high, especially if those buffers have been mapped and read
by the GPU.
If we assume that buffers don't move around very often, we can have the
client compute the relocations itself using the previous GPU address. When
that object doesn't move, the kernel can skip computing and writing the
updated data.
Here's a patch which adds a new field to struct drm_bo_info_req called
'presumed_offset', and a new DRM_BO_HINT_PRESUMED_OFFSET that is set when
this field has been filled in by the client.
There are two separate optimizations performed when the presumed_offset is
correct:
1. i915_exec_reloc checks to see if all previous buffer offsets were guessed
correctly. If so, there's no need for it to look at *any* of the
relocations for a buffer. When this happens, it skips the whole
relocation process, simply returning success.
2. i915_apply_reloc checks to see if the target buffer offset was guessed
correctly. If so, it skips mapping the relocatee, computing the
relocation and writing the value. If no relocations are needed, the
relocatee should never be mapped to the CPU, and so the kernel shouldn't
need to wait for any fences to pass.
Diffstat (limited to 'shared-core/i915_dma.c')
-rw-r--r-- | shared-core/i915_dma.c | 62 |
1 files changed, 48 insertions, 14 deletions
diff --git a/shared-core/i915_dma.c b/shared-core/i915_dma.c index 42114beb..090ac80a 100644 --- a/shared-core/i915_dma.c +++ b/shared-core/i915_dma.c @@ -712,15 +712,20 @@ struct i915_relocatee_info { int is_iomem; }; -static void i915_dereference_buffers_locked(struct drm_buffer_object **buffers, +struct drm_i915_validate_buffer { + struct drm_buffer_object *buffer; + int presumed_offset_correct; +}; + +static void i915_dereference_buffers_locked(struct drm_i915_validate_buffer *buffers, unsigned num_buffers) { while (num_buffers--) - drm_bo_usage_deref_locked(&buffers[num_buffers]); + drm_bo_usage_deref_locked(&buffers[num_buffers].buffer); } int i915_apply_reloc(struct drm_file *file_priv, int num_buffers, - struct drm_buffer_object **buffers, + struct drm_i915_validate_buffer *buffers, struct i915_relocatee_info *relocatee, uint32_t *reloc) { @@ -734,6 +739,13 @@ int i915_apply_reloc(struct drm_file *file_priv, int num_buffers, return -EINVAL; } + /* + * Short-circuit relocations that were correctly + * guessed by the client + */ + if (buffers[reloc[2]].presumed_offset_correct) + return 0; + new_cmd_offset = reloc[0]; if (!relocatee->data_page || !drm_bo_same_page(relocatee->offset, new_cmd_offset)) { @@ -751,7 +763,7 @@ int i915_apply_reloc(struct drm_file *file_priv, int num_buffers, relocatee->page_offset = (relocatee->offset & PAGE_MASK); } - val = buffers[reloc[2]]->offset; + val = buffers[reloc[2]].buffer->offset; index = (reloc[0] - relocatee->page_offset) >> 2; /* add in validate */ @@ -765,7 +777,7 @@ int i915_process_relocs(struct drm_file *file_priv, uint32_t buf_handle, uint32_t *reloc_buf_handle, struct i915_relocatee_info *relocatee, - struct drm_buffer_object **buffers, + struct drm_i915_validate_buffer *buffers, uint32_t num_buffers) { struct drm_device *dev = file_priv->head->dev; @@ -851,12 +863,25 @@ out: static int i915_exec_reloc(struct drm_file *file_priv, drm_handle_t buf_handle, drm_handle_t buf_reloc_handle, - struct drm_buffer_object **buffers, + struct drm_i915_validate_buffer *buffers, uint32_t buf_count) { struct drm_device *dev = file_priv->head->dev; struct i915_relocatee_info relocatee; int ret = 0; + int b; + + /* + * Short circuit relocations when all previous + * buffers offsets were correctly guessed by + * the client + */ + for (b = 0; b < buf_count; b++) + if (!buffers[b].presumed_offset_correct) + break; + + if (b == buf_count) + return 0; memset(&relocatee, 0, sizeof(relocatee)); @@ -890,7 +915,7 @@ out_err: */ int i915_validate_buffer_list(struct drm_file *file_priv, unsigned int fence_class, uint64_t data, - struct drm_buffer_object **buffers, + struct drm_i915_validate_buffer *buffers, uint32_t *num_buffers) { struct drm_i915_op_arg arg; @@ -910,7 +935,8 @@ int i915_validate_buffer_list(struct drm_file *file_priv, goto out_err; } - buffers[buf_count] = NULL; + buffers[buf_count].buffer = NULL; + buffers[buf_count].presumed_offset_correct = 0; if (copy_from_user(&arg, (void __user *)(unsigned long)data, sizeof(arg))) { ret = -EFAULT; @@ -920,7 +946,7 @@ int i915_validate_buffer_list(struct drm_file *file_priv, if (arg.handled) { data = arg.next; mutex_lock(&dev->struct_mutex); - buffers[buf_count] = drm_lookup_buffer_object(file_priv, req->arg_handle, 1); + buffers[buf_count].buffer = drm_lookup_buffer_object(file_priv, req->arg_handle, 1); mutex_unlock(&dev->struct_mutex); buf_count++; continue; @@ -951,13 +977,21 @@ int i915_validate_buffer_list(struct drm_file *file_priv, req->bo_req.hint, 0, &rep.bo_info, - &buffers[buf_count]); + &buffers[buf_count].buffer); if (rep.ret) { DRM_ERROR("error on handle validate %d\n", rep.ret); goto out_err; } - + /* + * If the user provided a presumed offset hint, check whether + * the buffer is in the same place, if so, relocations relative to + * this buffer need not be performed + */ + if ((req->bo_req.hint & DRM_BO_HINT_PRESUMED_OFFSET) && + buffers[buf_count].buffer->offset == req->bo_req.presumed_offset) { + buffers[buf_count].presumed_offset_correct = 1; + } next = arg.next; arg.handled = 1; @@ -991,7 +1025,7 @@ static int i915_execbuffer(struct drm_device *dev, void *data, struct drm_fence_arg *fence_arg = &exec_buf->fence_arg; int num_buffers; int ret; - struct drm_buffer_object **buffers; + struct drm_i915_validate_buffer *buffers; struct drm_fence_object *fence; if (!dev_priv->allow_batchbuffer) { @@ -1026,7 +1060,7 @@ static int i915_execbuffer(struct drm_device *dev, void *data, num_buffers = exec_buf->num_buffers; - buffers = drm_calloc(num_buffers, sizeof(struct drm_buffer_object *), DRM_MEM_DRIVER); + buffers = drm_calloc(num_buffers, sizeof(struct drm_i915_validate_buffer), DRM_MEM_DRIVER); if (!buffers) { drm_bo_read_unlock(&dev->bm.bm_lock); mutex_unlock(&dev_priv->cmdbuf_mutex); @@ -1044,7 +1078,7 @@ static int i915_execbuffer(struct drm_device *dev, void *data, drm_agp_chipset_flush(dev); /* submit buffer */ - batch->start = buffers[num_buffers-1]->offset; + batch->start = buffers[num_buffers-1].buffer->offset; DRM_DEBUG("i915 exec batchbuffer, start %x used %d cliprects %d\n", batch->start, batch->used, batch->num_cliprects); |