From 89bba44e969f15bf20da6d700c493237b095a588 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 2 May 2008 11:52:52 -0700 Subject: Add intel_bufmgr_gem for new graphics execution manager. --- src/mesa/drivers/dri/common/dri_bufmgr.c | 4 +- src/mesa/drivers/dri/common/dri_bufmgr.h | 17 +- src/mesa/drivers/dri/common/dri_bufmgr_fake.c | 4 +- src/mesa/drivers/dri/i965/Makefile | 2 +- src/mesa/drivers/dri/i965/intel_bufmgr_gem.c | 1 + src/mesa/drivers/dri/intel/intel_batchbuffer.c | 9 +- src/mesa/drivers/dri/intel/intel_bufmgr_gem.c | 818 +++++++++++++++++++++++++ src/mesa/drivers/dri/intel/intel_bufmgr_gem.h | 19 + src/mesa/drivers/dri/intel/intel_ioctl.c | 32 +- src/mesa/drivers/dri/intel/intel_ioctl.h | 3 +- 10 files changed, 869 insertions(+), 40 deletions(-) create mode 120000 src/mesa/drivers/dri/i965/intel_bufmgr_gem.c create mode 100644 src/mesa/drivers/dri/intel/intel_bufmgr_gem.c create mode 100644 src/mesa/drivers/dri/intel/intel_bufmgr_gem.h (limited to 'src') diff --git a/src/mesa/drivers/dri/common/dri_bufmgr.c b/src/mesa/drivers/dri/common/dri_bufmgr.c index 4df006fb9f..70ae091499 100644 --- a/src/mesa/drivers/dri/common/dri_bufmgr.c +++ b/src/mesa/drivers/dri/common/dri_bufmgr.c @@ -148,9 +148,9 @@ int dri_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta, return reloc_buf->bufmgr->emit_reloc(reloc_buf, flags, delta, offset, target_buf); } -void *dri_process_relocs(dri_bo *batch_buf, GLuint *count) +void *dri_process_relocs(dri_bo *batch_buf) { - return batch_buf->bufmgr->process_relocs(batch_buf, count); + return batch_buf->bufmgr->process_relocs(batch_buf); } void dri_post_submit(dri_bo *batch_buf, dri_fence **last_fence) diff --git a/src/mesa/drivers/dri/common/dri_bufmgr.h b/src/mesa/drivers/dri/common/dri_bufmgr.h index 4593eaf9f7..cbfeb9136d 100644 --- a/src/mesa/drivers/dri/common/dri_bufmgr.h +++ b/src/mesa/drivers/dri/common/dri_bufmgr.h @@ -41,7 +41,12 @@ typedef struct _dri_bo dri_bo; typedef struct _dri_fence dri_fence; struct _dri_bo { - /** Size in bytes of the buffer object. */ + /** + * Size in bytes of the buffer object. + * + * The size may be larger than the size originally requested for the + * allocation, such as being aligned to page size. + */ unsigned long size; /** * Card virtual address (offset from the beginning of the aperture) for the @@ -169,10 +174,10 @@ struct _dri_bufmgr { * into them the appopriate order. * * \param batch_buf buffer at the root of the tree of relocations - * \param count returns the number of buffers validated. - * \return relocation record for use in command submission. - * */ - void *(*process_relocs)(dri_bo *batch_buf, GLuint *count); + * \return argument to be completed and passed to the execbuffers ioctl + * (if any). + */ + void *(*process_relocs)(dri_bo *batch_buf); void (*post_submit)(dri_bo *batch_buf, dri_fence **fence); @@ -214,7 +219,7 @@ void dri_bufmgr_destroy(dri_bufmgr *bufmgr); int dri_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta, GLuint offset, dri_bo *target_buf); -void *dri_process_relocs(dri_bo *batch_buf, uint32_t *count); +void *dri_process_relocs(dri_bo *batch_buf); void dri_post_process_relocs(dri_bo *batch_buf); void dri_post_submit(dri_bo *batch_buf, dri_fence **last_fence); int dri_bufmgr_check_aperture_space(dri_bo *bo); diff --git a/src/mesa/drivers/dri/common/dri_bufmgr_fake.c b/src/mesa/drivers/dri/common/dri_bufmgr_fake.c index 9bf3f3437c..9dd06b07eb 100644 --- a/src/mesa/drivers/dri/common/dri_bufmgr_fake.c +++ b/src/mesa/drivers/dri/common/dri_bufmgr_fake.c @@ -1098,7 +1098,7 @@ dri_fake_reloc_and_validate_buffer(dri_bo *bo) } static void * -dri_fake_process_relocs(dri_bo *batch_buf, GLuint *count_p) +dri_fake_process_relocs(dri_bo *batch_buf) { dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)batch_buf->bufmgr; dri_bo_fake *batch_fake = (dri_bo_fake *)batch_buf; @@ -1126,8 +1126,6 @@ dri_fake_process_relocs(dri_bo *batch_buf, GLuint *count_p) assert(ret == 0); - *count_p = 0; /* junk */ - bufmgr_fake->current_total_size = 0; return NULL; } diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index d46b3428f5..ca9b7da40f 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -9,7 +9,7 @@ DRIVER_SOURCES = \ intel_blit.c \ intel_buffer_objects.c \ intel_buffers.c \ - intel_bufmgr_ttm.c \ + intel_bufmgr_gem.c \ intel_context.c \ intel_decode.c \ intel_depthstencil.c \ diff --git a/src/mesa/drivers/dri/i965/intel_bufmgr_gem.c b/src/mesa/drivers/dri/i965/intel_bufmgr_gem.c new file mode 120000 index 0000000000..dee0daf9c0 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_bufmgr_gem.c @@ -0,0 +1 @@ +../intel/intel_bufmgr_gem.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index a594fb6cc4..c5b0f531d4 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -131,11 +131,8 @@ do_flush_locked(struct intel_batchbuffer *batch, GLuint used, GLboolean allow_unlock) { struct intel_context *intel = batch->intel; - void *start; - GLuint count; dri_bo_unmap(batch->buf); - start = dri_process_relocs(batch->buf, &count); batch->map = NULL; batch->ptr = NULL; @@ -148,12 +145,16 @@ do_flush_locked(struct intel_batchbuffer *batch, if (!(intel->numClipRects == 0 && batch->cliprect_mode == LOOP_CLIPRECTS)) { if (intel->ttm == GL_TRUE) { + struct drm_i915_gem_execbuffer *execbuf; + + execbuf = dri_process_relocs(batch->buf); intel_exec_ioctl(batch->intel, used, batch->cliprect_mode != LOOP_CLIPRECTS, allow_unlock, - start, count, &batch->last_fence); + execbuf, &batch->last_fence); } else { + dri_process_relocs(batch->buf); intel_batch_ioctl(batch->intel, batch->buf->offset, used, diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c new file mode 100644 index 0000000000..2d8dced214 --- /dev/null +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c @@ -0,0 +1,818 @@ +/************************************************************************** + * + * Copyright © 2007 Red Hat Inc. + * Copyright © 2007 Intel Corporation + * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ +/* + * Authors: Thomas Hellström + * Keith Whitwell + * Eric Anholt + * Dave Airlie + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "errno.h" +#include "mtypes.h" +#include "dri_bufmgr.h" +#include "string.h" +#include "imports.h" + +#include "i915_drm.h" + +#include "intel_bufmgr_gem.h" + +#define DBG(...) do { \ + if (bufmgr_gem->bufmgr.debug) \ + fprintf(stderr, __VA_ARGS__); \ +} while (0) + +struct intel_validate_entry { + dri_bo *bo; + struct drm_i915_op_arg bo_arg; +}; + +struct dri_gem_bo_bucket_entry { + uint32_t gem_handle; + struct dri_gem_bo_bucket_entry *next; +}; + +struct dri_gem_bo_bucket { + struct dri_gem_bo_bucket_entry *head; + struct dri_gem_bo_bucket_entry **tail; + /** + * Limit on the number of entries in this bucket. + * + * 0 means that this caching at this bucket size is disabled. + * -1 means that there is no limit to caching at this size. + */ + int max_entries; + int num_entries; +}; + +/* Arbitrarily chosen, 16 means that the maximum size we'll cache for reuse + * is 1 << 16 pages, or 256MB. + */ +#define INTEL_GEM_BO_BUCKETS 16 +typedef struct _dri_bufmgr_gem { + dri_bufmgr bufmgr; + + int fd; + + uint32_t max_relocs; + + struct drm_i915_gem_validate_entry *validate_array; + dri_bo **validate_bo; + int validate_array_size; + int validate_count; + + /** Array of lists of cached gem objects of power-of-two sizes */ + struct dri_gem_bo_bucket cache_bucket[INTEL_GEM_BO_BUCKETS]; + + struct drm_i915_gem_execbuffer exec_arg; +} dri_bufmgr_gem; + +typedef struct _dri_bo_gem { + dri_bo bo; + + int refcount; + unsigned int map_count; + uint32_t gem_handle; + const char *name; + + /** + * Index of the buffer within the validation list while preparing a + * batchbuffer execution. + */ + int validate_index; + + /** Array passed to the DRM containing relocation information. */ + struct drm_i915_gem_relocation_entry *relocs; + /** Array of bos corresponding to relocs[i].target_handle */ + dri_bo **reloc_target_bo; + /** Number of entries in relocs */ + int reloc_count; + /** Mapped address for the buffer */ + void *virtual; +} dri_bo_gem; + +typedef struct _dri_fence_gem +{ + dri_fence fence; + + int refcount; + const char *name; + drmFence drm_fence; +} dri_fence_gem; + +static int +logbase2(int n) +{ + GLint i = 1; + GLint log2 = 0; + + while (n > i) { + i *= 2; + log2++; + } + + return log2; +} + +static struct dri_gem_bo_bucket * +dri_gem_bo_bucket_for_size(dri_bufmgr_gem *bufmgr_gem, unsigned long size) +{ + int i; + + /* We only do buckets in power of two increments */ + if ((size & (size - 1)) != 0) + return NULL; + + /* We should only see sizes rounded to pages. */ + assert((size % 4096) == 0); + + /* We always allocate in units of pages */ + i = ffs(size / 4096) - 1; + if (i >= INTEL_GEM_BO_BUCKETS) + return NULL; + + return &bufmgr_gem->cache_bucket[i]; +} + + +static void dri_gem_dump_validation_list(dri_bufmgr_gem *bufmgr_gem) +{ + int i, j; + + for (i = 0; i < bufmgr_gem->validate_count; i++) { + dri_bo *bo = bufmgr_gem->validate_bo[i]; + dri_bo_gem *bo_gem = (dri_bo_gem *)bo; + + if (bo_gem->relocs == NULL) { + DBG("%2d: %s\n", i, bo_gem->name); + continue; + } + + for (j = 0; j < bo_gem->reloc_count; j++) { + dri_bo *target_bo = bo_gem->reloc_target_bo[j]; + dri_bo_gem *target_gem = (dri_bo_gem *)target_bo; + + DBG("%2d: %s@0x%08llx -> %s@0x%08lx + 0x%08x\n", + i, + bo_gem->name, bo_gem->relocs[j].offset, + target_gem->name, target_bo->offset, + bo_gem->relocs[j].delta); + } + } +} + +/** + * Adds the given buffer to the list of buffers to be validated (moved into the + * appropriate memory type) with the next batch submission. + * + * If a buffer is validated multiple times in a batch submission, it ends up + * with the intersection of the memory type flags and the union of the + * access flags. + */ +static void +intel_add_validate_buffer(dri_bo *bo) +{ + dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bo->bufmgr; + dri_bo_gem *bo_gem = (dri_bo_gem *)bo; + int index; + + if (bo_gem->validate_index != -1) + return; + + /* Extend the array of validation entries as necessary. */ + if (bufmgr_gem->validate_count == bufmgr_gem->validate_array_size) { + int new_size = bufmgr_gem->validate_array_size * 2; + + if (new_size == 0) + new_size = 5; + + bufmgr_gem->validate_array = + realloc(bufmgr_gem->validate_array, + sizeof(*bufmgr_gem->validate_array) * new_size); + bufmgr_gem->validate_bo = + realloc(bufmgr_gem->validate_bo, + sizeof(*bufmgr_gem->validate_bo) * new_size); + bufmgr_gem->validate_array_size = new_size; + } + + index = bufmgr_gem->validate_count; + /* Fill in array entry */ + bufmgr_gem->validate_array[index].buffer_handle = bo_gem->gem_handle; + bufmgr_gem->validate_array[index].relocation_count = bo_gem->reloc_count; + bufmgr_gem->validate_array[index].relocs_ptr = (uintptr_t)bo_gem->relocs; + bufmgr_gem->validate_bo[index] = bo; + dri_bo_reference(bo); + bufmgr_gem->validate_count++; +} + + +#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \ + sizeof(uint32_t)) + +static int +intel_setup_reloc_list(dri_bo *bo) +{ + dri_bo_gem *bo_gem = (dri_bo_gem *)bo; + dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bo->bufmgr; + + bo_gem->relocs = calloc(bufmgr_gem->max_relocs, + sizeof(struct drm_i915_gem_relocation_entry)); + bo_gem->reloc_target_bo = calloc(1, sizeof(dri_bo *)); + + return 0; +} + +static dri_bo * +dri_gem_alloc(dri_bufmgr *bufmgr, const char *name, + unsigned long size, unsigned int alignment, + uint64_t location_mask) +{ + dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bufmgr; + dri_bo_gem *bo_gem; + unsigned int page_size = getpagesize(); + int ret; + struct dri_gem_bo_bucket *bucket; + GLboolean alloc_from_cache = GL_FALSE; + + bo_gem = calloc(1, sizeof(*bo_gem)); + if (!bo_gem) + return NULL; + + /* Round the allocated size up to a power of two number of pages. */ + bo_gem->bo.size = 1 << logbase2(size); + if (bo_gem->bo.size < page_size) + bo_gem->bo.size = page_size; + bucket = dri_gem_bo_bucket_for_size(bufmgr_gem, bo_gem->bo.size); + + /* If we don't have caching at this size, don't actually round the + * allocation up. + */ + if (bucket == NULL || bucket->max_entries == 0) { + bo_gem->bo.size = size; + if (bo_gem->bo.size < page_size) + bo_gem->bo.size = page_size; + } + + /* Get a buffer out of the cache if available */ + if (bucket != NULL && bucket->num_entries > 0) { + struct dri_gem_bo_bucket_entry *entry = bucket->head; +#if 0 + int busy; + + /* XXX */ + /* Check if the buffer is still in flight. If not, reuse it. */ + ret = drmBOBusy(bufmgr_gem->fd, &entry->drm_bo, &busy); + alloc_from_cache = (ret == 0 && busy == 0); +#else + alloc_from_cache = 0; +#endif + + if (alloc_from_cache) { + bucket->head = entry->next; + if (entry->next == NULL) + bucket->tail = &bucket->head; + bucket->num_entries--; + + bo_gem->gem_handle = entry->gem_handle; + free(entry); + } + } + + if (!alloc_from_cache) { + struct drm_gem_alloc alloc; + + memset(&alloc, 0, sizeof(alloc)); + alloc.size = bo_gem->bo.size; + + ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_ALLOC, &alloc); + bo_gem->gem_handle = alloc.handle; + if (ret != 0) { + free(bo_gem); + return NULL; + } + } + + bo_gem->bo.offset = 0; + bo_gem->bo.virtual = NULL; + bo_gem->bo.bufmgr = bufmgr; + bo_gem->name = name; + bo_gem->refcount = 1; + bo_gem->validate_index = -1; + + DBG("bo_create: %p (%s) %ldb\n", &bo_gem->bo, bo_gem->name, size); + + return &bo_gem->bo; +} + +/* Our GEM backend doesn't allow creation of static buffers, as that requires + * privelege for the non-fake case, and the lock in the fake case where we were + * working around the X Server not creating buffers and passing handles to us. + */ +static dri_bo * +dri_gem_alloc_static(dri_bufmgr *bufmgr, const char *name, + unsigned long offset, unsigned long size, void *virtual, + uint64_t location_mask) +{ + return NULL; +} + +/** + * Returns a dri_bo wrapping the given buffer object handle. + * + * This can be used when one application needs to pass a buffer object + * to another. + */ +dri_bo * +intel_gem_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name, + unsigned int handle) +{ + dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bufmgr; + dri_bo_gem *bo_gem; + int ret; + struct drm_gem_open open_arg; + + bo_gem = calloc(1, sizeof(*bo_gem)); + if (!bo_gem) + return NULL; + + memset(&open_arg, 0, sizeof(open_arg)); + open_arg.name = handle; + ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_OPEN, &open_arg); + if (ret != 0) { + fprintf(stderr, "Couldn't reference %s handle 0x%08x: %s\n", + name, handle, strerror(-ret)); + free(bo_gem); + return NULL; + } + bo_gem->bo.size = open_arg.size; + bo_gem->bo.offset = 0; + bo_gem->bo.virtual = NULL; + bo_gem->bo.bufmgr = bufmgr; + bo_gem->name = name; + bo_gem->refcount = 1; + bo_gem->validate_index = -1; + + DBG("bo_create_from_handle: %p %08x (%s)\n", + &bo_gem->bo, handle, bo_gem->name); + + return &bo_gem->bo; +} + +static void +dri_gem_bo_reference(dri_bo *bo) +{ + dri_bo_gem *bo_gem = (dri_bo_gem *)bo; + + bo_gem->refcount++; +} + +static void +dri_gem_bo_unreference(dri_bo *bo) +{ + dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bo->bufmgr; + dri_bo_gem *bo_gem = (dri_bo_gem *)bo; + + if (!bo) + return; + + if (--bo_gem->refcount == 0) { + struct dri_gem_bo_bucket *bucket; + int ret; + + assert(bo_gem->map_count == 0); + + if (bo_gem->relocs != NULL) { + int i; + + /* Unreference all the target buffers */ + for (i = 0; i < bo_gem->reloc_count; i++) + dri_bo_unreference(bo_gem->reloc_target_bo[i]); + free(bo_gem->reloc_target_bo); + free(bo_gem->relocs); + } + + bucket = dri_gem_bo_bucket_for_size(bufmgr_gem, bo->size); + /* Put the buffer into our internal cache for reuse if we can. */ + if (bucket != NULL && + (bucket->max_entries == -1 || + (bucket->max_entries > 0 && + bucket->num_entries < bucket->max_entries))) + { + struct dri_gem_bo_bucket_entry *entry; + + entry = calloc(1, sizeof(*entry)); + entry->gem_handle = bo_gem->gem_handle; + + entry->next = NULL; + *bucket->tail = entry; + bucket->tail = &entry->next; + bucket->num_entries++; + } else { + struct drm_gem_unreference unref; + + /* Decrement the kernel refcount for the buffer. */ + unref.handle = bo_gem->gem_handle; + ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_UNREFERENCE, &unref); + if (ret != 0) { + fprintf(stderr, "DRM_IOCTL_GEM_UNREFERENCE failed (%s): %s\n", + bo_gem->name, strerror(-ret)); + } + } + + DBG("bo_unreference final: %p (%s)\n", &bo_gem->bo, bo_gem->name); + + free(bo); + return; + } +} + +static int +dri_gem_bo_map(dri_bo *bo, GLboolean write_enable) +{ + dri_bufmgr_gem *bufmgr_gem; + dri_bo_gem *bo_gem = (dri_bo_gem *)bo; + int ret; + + bufmgr_gem = (dri_bufmgr_gem *)bo->bufmgr; + + /* Allow recursive mapping. Mesa may recursively map buffers with + * nested display loops. + */ + if (bo_gem->map_count++ != 0) + return 0; + + assert(bo->virtual == NULL); + + DBG("bo_map: %p (%s)\n", &bo_gem->bo, bo_gem->name); + + if (bo_gem->virtual == NULL) { + struct drm_gem_mmap mmap_arg; + + mmap_arg.handle = bo_gem->gem_handle; + ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_MMAP, &mmap_arg); + if (ret != 0) { + fprintf(stderr, "%s:%d: Error mapping buffer %s: %s .\n", + __FILE__, __LINE__, bo_gem->name, strerror(-ret)); + } + bo_gem->virtual = (void *)(uintptr_t)mmap_arg.addr_ptr; + } + + /* XXX Synchronization with hardware */ + + bo->virtual = bo_gem->virtual; + + return 0; +} + +static int +dri_gem_bo_unmap(dri_bo *bo) +{ + dri_bufmgr_gem *bufmgr_gem; + dri_bo_gem *bo_gem = (dri_bo_gem *)bo; + + if (bo == NULL) + return 0; + + assert(bo_gem->map_count != 0); + if (--bo_gem->map_count != 0) + return 0; + + bufmgr_gem = (dri_bufmgr_gem *)bo->bufmgr; + + assert(bo->virtual != NULL); + + DBG("bo_unmap: %p (%s)\n", &bo_gem->bo, bo_gem->name); + + munmap(bo_gem->virtual, bo->size); + bo_gem->virtual = NULL; + bo->virtual = NULL; + + return 0; +} + +static void +dri_gem_fence_reference(dri_fence *fence) +{ + dri_fence_gem *fence_gem = (dri_fence_gem *)fence; + dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)fence->bufmgr; + + ++fence_gem->refcount; + DBG("fence_reference: %p (%s)\n", &fence_gem->fence, fence_gem->name); +} + +static void +dri_gem_fence_unreference(dri_fence *fence) +{ + dri_fence_gem *fence_gem = (dri_fence_gem *)fence; + dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)fence->bufmgr; + + if (!fence) + return; + + DBG("fence_unreference: %p (%s)\n", &fence_gem->fence, fence_gem->name); + + if (--fence_gem->refcount == 0) { + int ret; + + ret = drmFenceUnreference(bufmgr_gem->fd, &fence_gem->drm_fence); + if (ret != 0) { + fprintf(stderr, "drmFenceUnreference failed (%s): %s\n", + fence_gem->name, strerror(-ret)); + } + + free(fence); + return; + } +} + +static void +dri_gem_fence_wait(dri_fence *fence) +{ + dri_fence_gem *fence_gem = (dri_fence_gem *)fence; + dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)fence->bufmgr; + int ret; + + ret = drmFenceWait(bufmgr_gem->fd, DRM_FENCE_FLAG_WAIT_LAZY, &fence_gem->drm_fence, 0); + if (ret != 0) { + fprintf(stderr, "%s:%d: Error waiting for fence %s: %s.\n", + __FILE__, __LINE__, fence_gem->name, strerror(-ret)); + abort(); + } + + DBG("fence_wait: %p (%s)\n", &fence_gem->fence, fence_gem->name); +} + +static void +dri_bufmgr_gem_destroy(dri_bufmgr *bufmgr) +{ + dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bufmgr; + int i; + + free(bufmgr_gem->validate_array); + free(bufmgr_gem->validate_bo); + + /* Free any cached buffer objects we were going to reuse */ + for (i = 0; i < INTEL_GEM_BO_BUCKETS; i++) { + struct dri_gem_bo_bucket *bucket = &bufmgr_gem->cache_bucket[i]; + struct dri_gem_bo_bucket_entry *entry; + + while ((entry = bucket->head) != NULL) { + struct drm_gem_unreference unref; + int ret; + + bucket->head = entry->next; + if (entry->next == NULL) + bucket->tail = &bucket->head; + bucket->num_entries--; + + /* Decrement the kernel refcount for the buffer. */ + unref.handle = entry->gem_handle; + ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_UNREFERENCE, &unref); + if (ret != 0) { + fprintf(stderr, "DRM_IOCTL_GEM_UNREFERENCE failed: %s\n", + strerror(-ret)); + } + + free(entry); + } + } + + free(bufmgr); +} + +/** + * Adds the target buffer to the validation list and adds the relocation + * to the reloc_buffer's relocation list. + * + * The relocation entry at the given offset must already contain the + * precomputed relocation value, because the kernel will optimize out + * the relocation entry write when the buffer hasn't moved from the + * last known offset in target_bo. + */ +static int +dri_gem_emit_reloc(dri_bo *bo, uint64_t flags, GLuint delta, + GLuint offset, dri_bo *target_bo) +{ + dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bo->bufmgr; + dri_bo_gem *bo_gem = (dri_bo_gem *)bo; + dri_bo_gem *target_bo_gem = (dri_bo_gem *)target_bo; + + /* Create a new relocation list if needed */ + if (bo_gem->relocs == NULL) + intel_setup_reloc_list(bo); + + /* Check overflow */ + assert(bo_gem->reloc_count < bufmgr_gem->max_relocs); + + bo_gem->relocs[bo_gem->reloc_count].offset = offset; + bo_gem->relocs[bo_gem->reloc_count].delta = delta; + bo_gem->relocs[bo_gem->reloc_count].target_handle = + target_bo_gem->gem_handle; + + bo_gem->reloc_target_bo[bo_gem->reloc_count] = target_bo; + dri_bo_reference(target_bo); + + bo_gem->reloc_count++; + return 0; +} + +/** + * Walk the tree of relocations rooted at BO and accumulate the list of + * validations to be performed and update the relocation buffers with + * index values into the validation list. + */ +static void +dri_gem_bo_process_reloc(dri_bo *bo) +{ + dri_bo_gem *bo_gem = (dri_bo_gem *)bo; + int i; + + if (bo_gem->relocs == NULL) + return; + + for (i = 0; i < bo_gem->reloc_count; i++) { + dri_bo *target_bo = bo_gem->reloc_target_bo[i]; + + /* Continue walking the tree depth-first. */ + dri_gem_bo_process_reloc(target_bo); + + /* Add the target to the validate list */ + intel_add_validate_buffer(target_bo); + } +} + +static void * +dri_gem_process_reloc(dri_bo *batch_buf) +{ + dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)batch_buf->bufmgr; + + /* Update indices and set up the validate list. */ + dri_gem_bo_process_reloc(batch_buf); + + /* Add the batch buffer to the validation list. There are no relocations + * pointing to it. + */ + intel_add_validate_buffer(batch_buf); + + bufmgr_gem->exec_arg.buffers_ptr = (uintptr_t)bufmgr_gem->validate_array; + bufmgr_gem->exec_arg.buffer_count = bufmgr_gem->validate_count; + bufmgr_gem->exec_arg.batch_start_offset = bufmgr_gem->validate_count; + + return &bufmgr_gem->exec_arg; +} + +static void +intel_update_buffer_offsets (dri_bufmgr_gem *bufmgr_gem) +{ + int i; + + for (i = 0; i < bufmgr_gem->validate_count; i++) { + dri_bo *bo = bufmgr_gem->validate_bo[i]; + dri_bo_gem *bo_gem = (dri_bo_gem *)bo; + + /* Update the buffer offset */ + if (bufmgr_gem->validate_array[i].buffer_offset != bo->offset) { + DBG("BO %s migrated: 0x%08lx -> 0x%08llx\n", + bo_gem->name, bo->offset, + bufmgr_gem->validate_array[i].buffer_offset); + bo->offset = bufmgr_gem->validate_array[i].buffer_offset; + } + } +} + +static void +dri_gem_post_submit(dri_bo *batch_buf, dri_fence **last_fence) +{ + dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)batch_buf->bufmgr; + int i; + + intel_update_buffer_offsets (bufmgr_gem); + + if (bufmgr_gem->bufmgr.debug) + dri_gem_dump_validation_list(bufmgr_gem); + + for (i = 0; i < bufmgr_gem->validate_count; i++) { + dri_bo *bo = bufmgr_gem->validate_bo[i]; + dri_bo_gem *bo_gem = (dri_bo_gem *)bo; + + /* Disconnect the buffer from the validate list */ + bo_gem->validate_index = -1; + dri_bo_unreference(bo); + bufmgr_gem->validate_bo[i] = NULL; + } + bufmgr_gem->validate_count = 0; +} + +/** + * Enables unlimited caching of buffer objects for reuse. + * + * This is potentially very memory expensive, as the cache at each bucket + * size is only bounded by how many buffers of that size we've managed to have + * in flight at once. + */ +void +intel_gem_enable_bo_reuse(dri_bufmgr *bufmgr) +{ + /* + dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bufmgr; + int i; + + for (i = 0; i < INTEL_GEM_BO_BUCKETS; i++) { + bufmgr_gem->cache_bucket[i].max_entries = -1; + } + */ +} + +/* + * + */ +static int +dri_gem_check_aperture_space(dri_bo *bo) +{ + return 0; +} + +/** + * Initializes the GEM buffer manager, which uses the kernel to allocate, map, + * and manage map buffer objections. + * + * \param fd File descriptor of the opened DRM device. + * \param fence_type Driver-specific fence type used for fences with no flush. + * \param fence_type_flush Driver-specific fence type used for fences with a + * flush. + */ +dri_bufmgr * +intel_bufmgr_gem_init(int fd, int batch_size) +{ + dri_bufmgr_gem *bufmgr_gem; + int i; + + bufmgr_gem = calloc(1, sizeof(*bufmgr_gem)); + bufmgr_gem->fd = fd; + + /* Let's go with one relocation per every 2 dwords (but round down a bit + * since a power of two will mean an extra page allocation for the reloc + * buffer). + * + * Every 4 was too few for the blender benchmark. + */ + bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; + + bufmgr_gem->bufmgr.bo_alloc = dri_gem_alloc; + bufmgr_gem->bufmgr.bo_alloc_static = dri_gem_alloc_static; + bufmgr_gem->bufmgr.bo_reference = dri_gem_bo_reference; + bufmgr_gem->bufmgr.bo_unreference = dri_gem_bo_unreference; + bufmgr_gem->bufmgr.bo_map = dri_gem_bo_map; + bufmgr_gem->bufmgr.bo_unmap = dri_gem_bo_unmap; + bufmgr_gem->bufmgr.fence_reference = dri_gem_fence_reference; + bufmgr_gem->bufmgr.fence_unreference = dri_gem_fence_unreference; + bufmgr_gem->bufmgr.fence_wait = dri_gem_fence_wait; + bufmgr_gem->bufmgr.destroy = dri_bufmgr_gem_destroy; + bufmgr_gem->bufmgr.emit_reloc = dri_gem_emit_reloc; + bufmgr_gem->bufmgr.process_relocs = dri_gem_process_reloc; + bufmgr_gem->bufmgr.post_submit = dri_gem_post_submit; + bufmgr_gem->bufmgr.debug = GL_FALSE; + bufmgr_gem->bufmgr.check_aperture_space = dri_gem_check_aperture_space; + /* Initialize the linked lists for BO reuse cache. */ + for (i = 0; i < INTEL_GEM_BO_BUCKETS; i++) + bufmgr_gem->cache_bucket[i].tail = &bufmgr_gem->cache_bucket[i].head; + + return &bufmgr_gem->bufmgr; +} + diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.h b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.h new file mode 100644 index 0000000000..a28f5ae814 --- /dev/null +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.h @@ -0,0 +1,19 @@ + +#ifndef INTEL_BUFMGR_GEM_H +#define INTEL_BUFMGR_GEM_H + +#include "dri_bufmgr.h" + +extern dri_bo *intel_gem_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name, + unsigned int handle); + +dri_fence *intel_gem_fence_create_from_arg(dri_bufmgr *bufmgr, const char *name, + drm_fence_arg_t *arg); + + +dri_bufmgr *intel_bufmgr_gem_init(int fd, int batch_size); + +void +intel_gem_enable_bo_reuse(dri_bufmgr *bufmgr); + +#endif /* INTEL_BUFMGR_GEM_H */ diff --git a/src/mesa/drivers/dri/intel/intel_ioctl.c b/src/mesa/drivers/dri/intel/intel_ioctl.c index 66e36102b9..f9624a6abe 100644 --- a/src/mesa/drivers/dri/intel/intel_ioctl.c +++ b/src/mesa/drivers/dri/intel/intel_ioctl.c @@ -151,9 +151,9 @@ void intel_exec_ioctl(struct intel_context *intel, GLuint used, GLboolean ignore_cliprects, GLboolean allow_unlock, - void *start, GLuint count, dri_fence **fence) + struct drm_i915_gem_execbuffer *execbuf, + dri_fence **fence) { - struct drm_i915_execbuffer execbuf; dri_fence *fo; int ret; @@ -169,16 +169,13 @@ intel_exec_ioctl(struct intel_context *intel, memset(&execbuf, 0, sizeof(execbuf)); - execbuf.num_buffers = count; - execbuf.batch.used = used; - execbuf.batch.cliprects = intel->pClipRects; - execbuf.batch.num_cliprects = ignore_cliprects ? 0 : intel->numClipRects; - execbuf.batch.DR1 = 0; - execbuf.batch.DR4 = ((((GLuint) intel->drawX) & 0xffff) | - (((GLuint) intel->drawY) << 16)); - - execbuf.ops_list = (unsigned long)start; // TODO - execbuf.fence_arg.flags = DRM_FENCE_FLAG_SHAREABLE | DRM_I915_FENCE_FLAG_FLUSHED; + execbuf->batch_start_offset = 0; + execbuf->batch_len = used; + execbuf->cliprects_ptr = (uintptr_t)intel->pClipRects; + execbuf->num_cliprects = ignore_cliprects ? 0 : intel->numClipRects; + execbuf->DR1 = 0; + execbuf->DR4 = ((((GLuint) intel->drawX) & 0xffff) | + (((GLuint) intel->drawY) << 16)); do { ret = drmCommandWriteRead(intel->driFd, DRM_I915_EXECBUFFER, &execbuf, @@ -191,17 +188,6 @@ intel_exec_ioctl(struct intel_context *intel, exit(1); } - if (execbuf.fence_arg.error != 0) { - - /* - * Fence creation has failed, but the GPU has been - * idled by the kernel. Safe to continue. - */ - - *fence = NULL; - return; - } - fo = intel_ttm_fence_create_from_arg(intel->bufmgr, "fence buffers", &execbuf.fence_arg); if (!fo) { diff --git a/src/mesa/drivers/dri/intel/intel_ioctl.h b/src/mesa/drivers/dri/intel/intel_ioctl.h index 8674aef723..7691a27f92 100644 --- a/src/mesa/drivers/dri/intel/intel_ioctl.h +++ b/src/mesa/drivers/dri/intel/intel_ioctl.h @@ -41,6 +41,7 @@ void intel_batch_ioctl( struct intel_context *intel, void intel_exec_ioctl(struct intel_context *intel, GLuint used, GLboolean ignore_cliprects, GLboolean allow_unlock, - void *start, GLuint count, dri_fence **fence); + struct drm_i915_gem_execbuffer *execbuf, + dri_fence **fence); #endif -- cgit v1.2.3 From eb10cdc838fc31ea2cf59f556f6f7d8b072f5bae Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 2 May 2008 14:11:19 -0700 Subject: [intel] Fix build for GEM. TTM is now disabled, and fencing is gone. Fencing was used in two places: ensuring that we didn't get too many frames ahead of ourselves, and glFinish. glFinish will be satisfied by waiting on buffers like we would do for CPU access on them. The "don't get too far ahead" is now the responsibility of the execution manager (kernel). --- src/mesa/drivers/dri/common/dri_bufmgr.c | 25 +------- src/mesa/drivers/dri/common/dri_bufmgr.h | 41 ++----------- src/mesa/drivers/dri/common/dri_bufmgr_fake.c | 79 ++------------------------ src/mesa/drivers/dri/i915/Makefile | 2 +- src/mesa/drivers/dri/intel/intel_batchbuffer.c | 28 +++------ src/mesa/drivers/dri/intel/intel_batchbuffer.h | 3 - src/mesa/drivers/dri/intel/intel_blit.c | 13 ----- src/mesa/drivers/dri/intel/intel_bufmgr_gem.c | 69 +--------------------- src/mesa/drivers/dri/intel/intel_bufmgr_gem.h | 7 +-- src/mesa/drivers/dri/intel/intel_context.c | 56 +++++++----------- src/mesa/drivers/dri/intel/intel_context.h | 3 - src/mesa/drivers/dri/intel/intel_ioctl.c | 19 +------ src/mesa/drivers/dri/intel/intel_ioctl.h | 3 +- src/mesa/drivers/dri/intel/intel_regions.c | 6 +- src/mesa/drivers/dri/intel/intel_screen.c | 2 +- 15 files changed, 55 insertions(+), 301 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/common/dri_bufmgr.c b/src/mesa/drivers/dri/common/dri_bufmgr.c index 70ae091499..69868b6665 100644 --- a/src/mesa/drivers/dri/common/dri_bufmgr.c +++ b/src/mesa/drivers/dri/common/dri_bufmgr.c @@ -90,27 +90,6 @@ dri_bo_unmap(dri_bo *buf) return buf->bufmgr->bo_unmap(buf); } -void -dri_fence_wait(dri_fence *fence) -{ - fence->bufmgr->fence_wait(fence); -} - -void -dri_fence_reference(dri_fence *fence) -{ - fence->bufmgr->fence_reference(fence); -} - -void -dri_fence_unreference(dri_fence *fence) -{ - if (fence == NULL) - return; - - fence->bufmgr->fence_unreference(fence); -} - void dri_bo_subdata(dri_bo *bo, unsigned long offset, unsigned long size, const void *data) @@ -153,9 +132,9 @@ void *dri_process_relocs(dri_bo *batch_buf) return batch_buf->bufmgr->process_relocs(batch_buf); } -void dri_post_submit(dri_bo *batch_buf, dri_fence **last_fence) +void dri_post_submit(dri_bo *batch_buf) { - batch_buf->bufmgr->post_submit(batch_buf, last_fence); + batch_buf->bufmgr->post_submit(batch_buf); } void diff --git a/src/mesa/drivers/dri/common/dri_bufmgr.h b/src/mesa/drivers/dri/common/dri_bufmgr.h index cbfeb9136d..dffeb4c601 100644 --- a/src/mesa/drivers/dri/common/dri_bufmgr.h +++ b/src/mesa/drivers/dri/common/dri_bufmgr.h @@ -38,7 +38,6 @@ typedef struct _dri_bufmgr dri_bufmgr; typedef struct _dri_bo dri_bo; -typedef struct _dri_fence dri_fence; struct _dri_bo { /** @@ -61,18 +60,6 @@ struct _dri_bo { dri_bufmgr *bufmgr; }; -struct _dri_fence { - /** - * This is an ORed mask of DRM_BO_FLAG_READ, DRM_BO_FLAG_WRITE, and - * DRM_FLAG_EXE indicating the operations associated with this fence. - * - * It is constant for the life of the fence object. - */ - unsigned int type; - /** Buffer manager context associated with this fence */ - dri_bufmgr *bufmgr; -}; - /** * Context for a buffer manager instance. * @@ -113,28 +100,15 @@ struct _dri_bufmgr { /** * Maps the buffer into userspace. * - * This function will block waiting for any existing fence on the buffer to - * clear, first. The resulting mapping is available at buf->virtual. -\ */ + * This function will block waiting for any existing execution on the + * buffer to complete, first. The resulting mapping is available at + * buf->virtual. + */ int (*bo_map)(dri_bo *buf, GLboolean write_enable); /** Reduces the refcount on the userspace mapping of the buffer object. */ int (*bo_unmap)(dri_bo *buf); - /** Takes a reference on a fence object */ - void (*fence_reference)(dri_fence *fence); - - /** - * Releases a reference on a fence object, freeing the data if - * rerefences remain. - */ - void (*fence_unreference)(dri_fence *fence); - - /** - * Blocks until the given fence is signaled. - */ - void (*fence_wait)(dri_fence *fence); - /** * Tears down the buffer manager instance. */ @@ -179,7 +153,7 @@ struct _dri_bufmgr { */ void *(*process_relocs)(dri_bo *batch_buf); - void (*post_submit)(dri_bo *batch_buf, dri_fence **fence); + void (*post_submit)(dri_bo *batch_buf); int (*check_aperture_space)(dri_bo *bo); GLboolean debug; /**< Enables verbose debugging printouts */ @@ -194,9 +168,6 @@ void dri_bo_reference(dri_bo *bo); void dri_bo_unreference(dri_bo *bo); int dri_bo_map(dri_bo *buf, GLboolean write_enable); int dri_bo_unmap(dri_bo *buf); -void dri_fence_wait(dri_fence *fence); -void dri_fence_reference(dri_fence *fence); -void dri_fence_unreference(dri_fence *fence); void dri_bo_subdata(dri_bo *bo, unsigned long offset, unsigned long size, const void *data); @@ -221,7 +192,7 @@ int dri_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta, GLuint offset, dri_bo *target_buf); void *dri_process_relocs(dri_bo *batch_buf); void dri_post_process_relocs(dri_bo *batch_buf); -void dri_post_submit(dri_bo *batch_buf, dri_fence **last_fence); +void dri_post_submit(dri_bo *batch_buf); int dri_bufmgr_check_aperture_space(dri_bo *bo); #endif diff --git a/src/mesa/drivers/dri/common/dri_bufmgr_fake.c b/src/mesa/drivers/dri/common/dri_bufmgr_fake.c index 9dd06b07eb..fc52674839 100644 --- a/src/mesa/drivers/dri/common/dri_bufmgr_fake.c +++ b/src/mesa/drivers/dri/common/dri_bufmgr_fake.c @@ -170,15 +170,6 @@ typedef struct _dri_bo_fake { void *invalidate_ptr; } dri_bo_fake; -typedef struct _dri_fence_fake { - dri_fence fence; - - const char *name; - unsigned int refcount; - unsigned int fence_cookie; - GLboolean flushed; -} dri_fence_fake; - static int clear_fenced(dri_bufmgr_fake *bufmgr_fake, unsigned int fence_cookie); @@ -898,63 +889,16 @@ dri_fake_bo_validate(dri_bo *bo, uint64_t flags) return 0; } -static dri_fence * -dri_fake_fence_validated(dri_bufmgr *bufmgr, const char *name, - GLboolean flushed) +static void +dri_fake_fence_validated(dri_bufmgr *bufmgr) { - dri_fence_fake *fence_fake; dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bufmgr; unsigned int cookie; - fence_fake = malloc(sizeof(*fence_fake)); - if (!fence_fake) - return NULL; - - fence_fake->refcount = 1; - fence_fake->name = name; - fence_fake->flushed = flushed; - fence_fake->fence.bufmgr = bufmgr; - cookie = _fence_emit_internal(bufmgr_fake); - fence_fake->fence_cookie = cookie; fence_blocks(bufmgr_fake, cookie); - DBG("drm_fence_validated: 0x%08x cookie\n", fence_fake->fence_cookie); - - return &fence_fake->fence; -} - -static void -dri_fake_fence_reference(dri_fence *fence) -{ - dri_fence_fake *fence_fake = (dri_fence_fake *)fence; - - ++fence_fake->refcount; -} - -static void -dri_fake_fence_unreference(dri_fence *fence) -{ - dri_fence_fake *fence_fake = (dri_fence_fake *)fence; - - if (!fence) - return; - - if (--fence_fake->refcount == 0) { - free(fence); - return; - } -} - -static void -dri_fake_fence_wait(dri_fence *fence) -{ - dri_fence_fake *fence_fake = (dri_fence_fake *)fence; - dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)fence->bufmgr; - - DBG("drm_fence_wait: 0x%08x cookie\n", fence_fake->fence_cookie); - - _fence_wait_internal(bufmgr_fake, fence_fake->fence_cookie); + DBG("drm_fence_validated: 0x%08x cookie\n", cookie); } static void @@ -1156,19 +1100,9 @@ dri_bo_fake_post_submit(dri_bo *bo) static void -dri_fake_post_submit(dri_bo *batch_buf, dri_fence **last_fence) +dri_fake_post_submit(dri_bo *batch_buf) { - dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)batch_buf->bufmgr; - dri_fence *fo; - - fo = dri_fake_fence_validated(batch_buf->bufmgr, "Batch fence", GL_TRUE); - - if (bufmgr_fake->performed_rendering) { - dri_fence_unreference(*last_fence); - *last_fence = fo; - } else { - dri_fence_unreference(fo); - } + dri_fake_fence_validated(batch_buf->bufmgr); dri_bo_fake_post_submit(batch_buf); } @@ -1224,9 +1158,6 @@ dri_bufmgr_fake_init(unsigned long low_offset, void *low_virtual, bufmgr_fake->bufmgr.bo_unreference = dri_fake_bo_unreference; bufmgr_fake->bufmgr.bo_map = dri_fake_bo_map; bufmgr_fake->bufmgr.bo_unmap = dri_fake_bo_unmap; - bufmgr_fake->bufmgr.fence_wait = dri_fake_fence_wait; - bufmgr_fake->bufmgr.fence_reference = dri_fake_fence_reference; - bufmgr_fake->bufmgr.fence_unreference = dri_fake_fence_unreference; bufmgr_fake->bufmgr.destroy = dri_fake_destroy; bufmgr_fake->bufmgr.emit_reloc = dri_fake_emit_reloc; bufmgr_fake->bufmgr.process_relocs = dri_fake_process_relocs; diff --git a/src/mesa/drivers/dri/i915/Makefile b/src/mesa/drivers/dri/i915/Makefile index 7ef055ccad..ed23410697 100644 --- a/src/mesa/drivers/dri/i915/Makefile +++ b/src/mesa/drivers/dri/i915/Makefile @@ -52,7 +52,7 @@ DRIVER_SOURCES = \ intel_tris.c \ intel_fbo.c \ intel_depthstencil.c \ - intel_bufmgr_ttm.c + intel_bufmgr_gem.c C_SOURCES = \ $(COMMON_SOURCES) \ diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index c5b0f531d4..683d06a552 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -99,7 +99,6 @@ intel_batchbuffer_alloc(struct intel_context *intel) struct intel_batchbuffer *batch = calloc(sizeof(*batch), 1); batch->intel = intel; - batch->last_fence = NULL; intel_batchbuffer_reset(batch); return batch; @@ -108,11 +107,6 @@ intel_batchbuffer_alloc(struct intel_context *intel) void intel_batchbuffer_free(struct intel_batchbuffer *batch) { - if (batch->last_fence) { - dri_fence_wait(batch->last_fence); - dri_fence_unreference(batch->last_fence); - batch->last_fence = NULL; - } if (batch->map) { dri_bo_unmap(batch->buf); batch->map = NULL; @@ -152,7 +146,7 @@ do_flush_locked(struct intel_batchbuffer *batch, used, batch->cliprect_mode != LOOP_CLIPRECTS, allow_unlock, - execbuf, &batch->last_fence); + execbuf); } else { dri_process_relocs(batch->buf); intel_batch_ioctl(batch->intel, @@ -162,8 +156,8 @@ do_flush_locked(struct intel_batchbuffer *batch, allow_unlock); } } - - dri_post_submit(batch->buf, &batch->last_fence); + + dri_post_submit(batch->buf); if (intel->numClipRects == 0 && batch->cliprect_mode == LOOP_CLIPRECTS) { @@ -243,9 +237,13 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, UNLOCK_HARDWARE(intel); if (INTEL_DEBUG & DEBUG_SYNC) { + int irq; + fprintf(stderr, "waiting for idle\n"); - if (batch->last_fence != NULL) - dri_fence_wait(batch->last_fence); + LOCK_HARDWARE(intel); + irq = intelEmitIrqLocked(intel); + UNLOCK_HARDWARE(intel); + intelWaitIrq(intel, irq); } /* Reset the buffer: @@ -253,14 +251,6 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, intel_batchbuffer_reset(batch); } -void -intel_batchbuffer_finish(struct intel_batchbuffer *batch) -{ - intel_batchbuffer_flush(batch); - if (batch->last_fence != NULL) - dri_fence_wait(batch->last_fence); -} - /* This is the only way buffers get added to the validate list. */ diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h index 2d636df2ce..feddfb46df 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h @@ -40,7 +40,6 @@ struct intel_batchbuffer struct intel_context *intel; dri_bo *buf; - dri_fence *last_fence; GLubyte *map; GLubyte *ptr; @@ -58,8 +57,6 @@ struct intel_batchbuffer *intel_batchbuffer_alloc(struct intel_context void intel_batchbuffer_free(struct intel_batchbuffer *batch); -void intel_batchbuffer_finish(struct intel_batchbuffer *batch); - void _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, int line); diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index 4890826a19..b7d36d8cd6 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -66,14 +66,6 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv, intelScreen = intel->intelScreen; - if (intel->last_swap_fence) { - dri_fence_wait(intel->last_swap_fence); - dri_fence_unreference(intel->last_swap_fence); - intel->last_swap_fence = NULL; - } - intel->last_swap_fence = intel->first_swap_fence; - intel->first_swap_fence = NULL; - /* The LOCK_HARDWARE is required for the cliprects. Buffer offsets * should work regardless. */ @@ -163,12 +155,7 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv, ADVANCE_BATCH(); } - if (intel->first_swap_fence) - dri_fence_unreference(intel->first_swap_fence); intel_batchbuffer_flush(intel->batch); - intel->first_swap_fence = intel->batch->last_fence; - if (intel->first_swap_fence) - dri_fence_reference(intel->first_swap_fence); } UNLOCK_HARDWARE(intel); diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c index 2d8dced214..07f782ca3a 100644 --- a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c @@ -127,15 +127,6 @@ typedef struct _dri_bo_gem { void *virtual; } dri_bo_gem; -typedef struct _dri_fence_gem -{ - dri_fence fence; - - int refcount; - const char *name; - drmFence drm_fence; -} dri_fence_gem; - static int logbase2(int n) { @@ -526,58 +517,6 @@ dri_gem_bo_unmap(dri_bo *bo) return 0; } -static void -dri_gem_fence_reference(dri_fence *fence) -{ - dri_fence_gem *fence_gem = (dri_fence_gem *)fence; - dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)fence->bufmgr; - - ++fence_gem->refcount; - DBG("fence_reference: %p (%s)\n", &fence_gem->fence, fence_gem->name); -} - -static void -dri_gem_fence_unreference(dri_fence *fence) -{ - dri_fence_gem *fence_gem = (dri_fence_gem *)fence; - dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)fence->bufmgr; - - if (!fence) - return; - - DBG("fence_unreference: %p (%s)\n", &fence_gem->fence, fence_gem->name); - - if (--fence_gem->refcount == 0) { - int ret; - - ret = drmFenceUnreference(bufmgr_gem->fd, &fence_gem->drm_fence); - if (ret != 0) { - fprintf(stderr, "drmFenceUnreference failed (%s): %s\n", - fence_gem->name, strerror(-ret)); - } - - free(fence); - return; - } -} - -static void -dri_gem_fence_wait(dri_fence *fence) -{ - dri_fence_gem *fence_gem = (dri_fence_gem *)fence; - dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)fence->bufmgr; - int ret; - - ret = drmFenceWait(bufmgr_gem->fd, DRM_FENCE_FLAG_WAIT_LAZY, &fence_gem->drm_fence, 0); - if (ret != 0) { - fprintf(stderr, "%s:%d: Error waiting for fence %s: %s.\n", - __FILE__, __LINE__, fence_gem->name, strerror(-ret)); - abort(); - } - - DBG("fence_wait: %p (%s)\n", &fence_gem->fence, fence_gem->name); -} - static void dri_bufmgr_gem_destroy(dri_bufmgr *bufmgr) { @@ -717,7 +656,7 @@ intel_update_buffer_offsets (dri_bufmgr_gem *bufmgr_gem) } static void -dri_gem_post_submit(dri_bo *batch_buf, dri_fence **last_fence) +dri_gem_post_submit(dri_bo *batch_buf) { dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)batch_buf->bufmgr; int i; @@ -773,9 +712,6 @@ dri_gem_check_aperture_space(dri_bo *bo) * and manage map buffer objections. * * \param fd File descriptor of the opened DRM device. - * \param fence_type Driver-specific fence type used for fences with no flush. - * \param fence_type_flush Driver-specific fence type used for fences with a - * flush. */ dri_bufmgr * intel_bufmgr_gem_init(int fd, int batch_size) @@ -800,9 +736,6 @@ intel_bufmgr_gem_init(int fd, int batch_size) bufmgr_gem->bufmgr.bo_unreference = dri_gem_bo_unreference; bufmgr_gem->bufmgr.bo_map = dri_gem_bo_map; bufmgr_gem->bufmgr.bo_unmap = dri_gem_bo_unmap; - bufmgr_gem->bufmgr.fence_reference = dri_gem_fence_reference; - bufmgr_gem->bufmgr.fence_unreference = dri_gem_fence_unreference; - bufmgr_gem->bufmgr.fence_wait = dri_gem_fence_wait; bufmgr_gem->bufmgr.destroy = dri_bufmgr_gem_destroy; bufmgr_gem->bufmgr.emit_reloc = dri_gem_emit_reloc; bufmgr_gem->bufmgr.process_relocs = dri_gem_process_reloc; diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.h b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.h index a28f5ae814..36caeba214 100644 --- a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.h +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.h @@ -4,13 +4,10 @@ #include "dri_bufmgr.h" -extern dri_bo *intel_gem_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name, +extern dri_bo *intel_gem_bo_create_from_handle(dri_bufmgr *bufmgr, + const char *name, unsigned int handle); -dri_fence *intel_gem_fence_create_from_arg(dri_bufmgr *bufmgr, const char *name, - drm_fence_arg_t *arg); - - dri_bufmgr *intel_bufmgr_gem_init(int fd, int batch_size); void diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 47e7d1afc2..4a1e1a9ac0 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -59,7 +59,7 @@ #include "intel_buffer_objects.h" #include "intel_fbo.h" #include "intel_decode.h" -#include "intel_bufmgr_ttm.h" +#include "intel_bufmgr_gem.h" #include "drirenderbuffer.h" #include "vblank.h" @@ -368,12 +368,16 @@ intelFlush(GLcontext * ctx) void intelFinish(GLcontext * ctx) { - struct intel_context *intel = intel_context(ctx); + struct gl_framebuffer *fb = ctx->DrawBuffer; + int i; + intelFlush(ctx); - if (intel->batch->last_fence) { - dri_fence_wait(intel->batch->last_fence); - dri_fence_unreference(intel->batch->last_fence); - intel->batch->last_fence = NULL; + + for (i = 0; i < fb->_NumColorDrawBuffers; i++) { + /* XXX: Wait on buffer idle */ + } + if (fb->_DepthBuffer) { + /* XXX: Wait on buffer idle */ } } @@ -439,28 +443,25 @@ static GLboolean intel_init_bufmgr(struct intel_context *intel) { intelScreenPrivate *intelScreen = intel->intelScreen; - GLboolean ttm_disable = getenv("INTEL_NO_TTM") != NULL; - GLboolean ttm_supported; + GLboolean gem_disable = getenv("INTEL_NO_GEM") != NULL; + GLboolean gem_supported; - /* If we've got a new enough DDX that's initializing TTM and giving us + /* If we've got a new enough DDX that's initializing GEM and giving us * object handles for the shared buffers, use that. */ intel->ttm = GL_FALSE; if (intel->intelScreen->driScrnPriv->dri2.enabled) - ttm_supported = GL_TRUE; + gem_supported = GL_TRUE; else if (intel->intelScreen->driScrnPriv->ddx_version.minor >= 9 && intel->intelScreen->drmMinor >= 11 && intel->intelScreen->front.bo_handle != -1) - ttm_supported = GL_TRUE; + gem_supported = GL_TRUE; else - ttm_supported = GL_FALSE; + gem_supported = GL_FALSE; - if (!ttm_disable && ttm_supported) { + if (!gem_disable && gem_supported) { int bo_reuse_mode; - intel->bufmgr = intel_bufmgr_ttm_init(intel->driFd, - DRM_FENCE_TYPE_EXE, - DRM_FENCE_TYPE_EXE | - DRM_I915_FENCE_TYPE_RW, + intel->bufmgr = intel_bufmgr_gem_init(intel->driFd, BATCH_SZ); if (intel->bufmgr != NULL) intel->ttm = GL_TRUE; @@ -470,16 +471,16 @@ intel_init_bufmgr(struct intel_context *intel) case DRI_CONF_BO_REUSE_DISABLED: break; case DRI_CONF_BO_REUSE_ALL: - intel_ttm_enable_bo_reuse(intel->bufmgr); + intel_gem_enable_bo_reuse(intel->bufmgr); break; } } /* Otherwise, use the classic buffer manager. */ if (intel->bufmgr == NULL) { - if (ttm_disable) { - fprintf(stderr, "TTM buffer manager disabled. Using classic.\n"); + if (gem_disable) { + fprintf(stderr, "GEM disabled. Using classic.\n"); } else { - fprintf(stderr, "Failed to initialize TTM buffer manager. " + fprintf(stderr, "Failed to initialize GEM. " "Falling back to classic.\n"); } @@ -663,8 +664,6 @@ intelInitContext(struct intel_context *intel, intel_recreate_static_regions(intel); intel->batch = intel_batchbuffer_alloc(intel); - intel->last_swap_fence = NULL; - intel->first_swap_fence = NULL; intel_bufferobj_init(intel); intel_fbo_init(intel); @@ -718,17 +717,6 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv) intel_batchbuffer_free(intel->batch); - if (intel->last_swap_fence) { - dri_fence_wait(intel->last_swap_fence); - dri_fence_unreference(intel->last_swap_fence); - intel->last_swap_fence = NULL; - } - if (intel->first_swap_fence) { - dri_fence_wait(intel->first_swap_fence); - dri_fence_unreference(intel->first_swap_fence); - intel->first_swap_fence = NULL; - } - if (release_texture_heaps) { /* This share group is about to go away, free our private * texture object data. diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index 1348b0adcf..35ef22aa27 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -174,9 +174,6 @@ struct intel_context */ GLboolean ttm; - dri_fence *last_swap_fence; - dri_fence *first_swap_fence; - struct intel_batchbuffer *batch; GLboolean no_batch_wrap; unsigned batch_id; diff --git a/src/mesa/drivers/dri/intel/intel_ioctl.c b/src/mesa/drivers/dri/intel/intel_ioctl.c index f9624a6abe..2f4caf9535 100644 --- a/src/mesa/drivers/dri/intel/intel_ioctl.c +++ b/src/mesa/drivers/dri/intel/intel_ioctl.c @@ -43,7 +43,7 @@ #include "drm.h" #include "i915_drm.h" -#include "intel_bufmgr_ttm.h" +#include "intel_bufmgr_gem.h" #define FILE_DEBUG_FLAG DEBUG_IOCTL @@ -151,10 +151,8 @@ void intel_exec_ioctl(struct intel_context *intel, GLuint used, GLboolean ignore_cliprects, GLboolean allow_unlock, - struct drm_i915_gem_execbuffer *execbuf, - dri_fence **fence) + struct drm_i915_gem_execbuffer *execbuf) { - dri_fence *fo; int ret; assert(intel->locked); @@ -163,10 +161,6 @@ intel_exec_ioctl(struct intel_context *intel, if (intel->no_hw) return; - if (*fence) { - dri_fence_unreference(*fence); - } - memset(&execbuf, 0, sizeof(execbuf)); execbuf->batch_start_offset = 0; @@ -187,13 +181,4 @@ intel_exec_ioctl(struct intel_context *intel, UNLOCK_HARDWARE(intel); exit(1); } - - fo = intel_ttm_fence_create_from_arg(intel->bufmgr, "fence buffers", - &execbuf.fence_arg); - if (!fo) { - fprintf(stderr, "failed to fence handle: %08x\n", execbuf.fence_arg.handle); - UNLOCK_HARDWARE(intel); - exit(1); - } - *fence = fo; } diff --git a/src/mesa/drivers/dri/intel/intel_ioctl.h b/src/mesa/drivers/dri/intel/intel_ioctl.h index 7691a27f92..52b0ab6102 100644 --- a/src/mesa/drivers/dri/intel/intel_ioctl.h +++ b/src/mesa/drivers/dri/intel/intel_ioctl.h @@ -41,7 +41,6 @@ void intel_batch_ioctl( struct intel_context *intel, void intel_exec_ioctl(struct intel_context *intel, GLuint used, GLboolean ignore_cliprects, GLboolean allow_unlock, - struct drm_i915_gem_execbuffer *execbuf, - dri_fence **fence); + struct drm_i915_gem_execbuffer *execbuf); #endif diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c index 8bc548913f..7d78e4eca7 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.c +++ b/src/mesa/drivers/dri/intel/intel_regions.c @@ -44,7 +44,7 @@ #include "intel_blit.h" #include "intel_buffer_objects.h" #include "dri_bufmgr.h" -#include "intel_bufmgr_ttm.h" +#include "intel_bufmgr_gem.h" #include "intel_batchbuffer.h" #define FILE_DEBUG_FLAG DEBUG_REGION @@ -121,7 +121,7 @@ intel_region_alloc_for_handle(struct intel_context *intel, { dri_bo *buffer; - buffer = intel_ttm_bo_create_from_handle(intel->bufmgr, "region", handle); + buffer = intel_gem_bo_create_from_handle(intel->bufmgr, "region", handle); return intel_region_alloc_internal(intel, cpp, pitch, height, tiled, buffer); @@ -440,7 +440,7 @@ intel_recreate_static(struct intel_context *intel, if (intel->ttm) { assert(region_desc->bo_handle != -1); - region->buffer = intel_ttm_bo_create_from_handle(intel->bufmgr, + region->buffer = intel_gem_bo_create_from_handle(intel->bufmgr, name, region_desc->bo_handle); } else { diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index 52e062eece..7e0713c4f4 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -49,7 +49,7 @@ #include "i830_dri.h" #include "intel_regions.h" #include "intel_batchbuffer.h" -#include "intel_bufmgr_ttm.h" +#include "intel_bufmgr_gem.h" PUBLIC const char __driConfigOptions[] = DRI_CONF_BEGIN -- cgit v1.2.3 From ef33e76cebed39551aabce397d165d3990ba517c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 2 May 2008 17:00:47 -0700 Subject: Minor fixups to get GEM to the point of execbuf ioctl. --- src/mesa/drivers/dri/intel/intel_bufmgr_gem.c | 19 +++++++++++-------- src/mesa/drivers/dri/intel/intel_ioctl.c | 2 -- 2 files changed, 11 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c index 07f782ca3a..a18ccd1637 100644 --- a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c @@ -250,9 +250,9 @@ intel_setup_reloc_list(dri_bo *bo) } static dri_bo * -dri_gem_alloc(dri_bufmgr *bufmgr, const char *name, - unsigned long size, unsigned int alignment, - uint64_t location_mask) +dri_gem_bo_alloc(dri_bufmgr *bufmgr, const char *name, + unsigned long size, unsigned int alignment, + uint64_t location_mask) { dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bufmgr; dri_bo_gem *bo_gem; @@ -336,9 +336,9 @@ dri_gem_alloc(dri_bufmgr *bufmgr, const char *name, * working around the X Server not creating buffers and passing handles to us. */ static dri_bo * -dri_gem_alloc_static(dri_bufmgr *bufmgr, const char *name, - unsigned long offset, unsigned long size, void *virtual, - uint64_t location_mask) +dri_gem_bo_alloc_static(dri_bufmgr *bufmgr, const char *name, + unsigned long offset, unsigned long size, void *virtual, + uint64_t location_mask) { return NULL; } @@ -475,7 +475,10 @@ dri_gem_bo_map(dri_bo *bo, GLboolean write_enable) if (bo_gem->virtual == NULL) { struct drm_gem_mmap mmap_arg; + memset(&mmap_arg, 0, sizeof(mmap_arg)); mmap_arg.handle = bo_gem->gem_handle; + mmap_arg.offset = 0; + mmap_arg.size = bo->size; ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_MMAP, &mmap_arg); if (ret != 0) { fprintf(stderr, "%s:%d: Error mapping buffer %s: %s .\n", @@ -730,8 +733,8 @@ intel_bufmgr_gem_init(int fd, int batch_size) */ bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; - bufmgr_gem->bufmgr.bo_alloc = dri_gem_alloc; - bufmgr_gem->bufmgr.bo_alloc_static = dri_gem_alloc_static; + bufmgr_gem->bufmgr.bo_alloc = dri_gem_bo_alloc; + bufmgr_gem->bufmgr.bo_alloc_static = dri_gem_bo_alloc_static; bufmgr_gem->bufmgr.bo_reference = dri_gem_bo_reference; bufmgr_gem->bufmgr.bo_unreference = dri_gem_bo_unreference; bufmgr_gem->bufmgr.bo_map = dri_gem_bo_map; diff --git a/src/mesa/drivers/dri/intel/intel_ioctl.c b/src/mesa/drivers/dri/intel/intel_ioctl.c index 2f4caf9535..ec1dec0b1e 100644 --- a/src/mesa/drivers/dri/intel/intel_ioctl.c +++ b/src/mesa/drivers/dri/intel/intel_ioctl.c @@ -161,8 +161,6 @@ intel_exec_ioctl(struct intel_context *intel, if (intel->no_hw) return; - memset(&execbuf, 0, sizeof(execbuf)); - execbuf->batch_start_offset = 0; execbuf->batch_len = used; execbuf->cliprects_ptr = (uintptr_t)intel->pClipRects; -- cgit v1.2.3 From 7349f218b47b21595a13103aaa45ddbfdc14dd13 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 2 May 2008 17:13:45 -0700 Subject: Fix to use GEM execbuf instead of TTM. --- src/mesa/drivers/dri/intel/intel_ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_ioctl.c b/src/mesa/drivers/dri/intel/intel_ioctl.c index ec1dec0b1e..b84cb64633 100644 --- a/src/mesa/drivers/dri/intel/intel_ioctl.c +++ b/src/mesa/drivers/dri/intel/intel_ioctl.c @@ -170,12 +170,12 @@ intel_exec_ioctl(struct intel_context *intel, (((GLuint) intel->drawY) << 16)); do { - ret = drmCommandWriteRead(intel->driFd, DRM_I915_EXECBUFFER, &execbuf, + ret = drmCommandWriteRead(intel->driFd, DRM_I915_GEM_EXECBUFFER, &execbuf, sizeof(execbuf)); } while (ret == -EAGAIN); if (ret != 0) { - fprintf(stderr, "DRM_I915_EXECBUFFER: %d\n", -errno); + fprintf(stderr, "DRM_I915_GEM_EXECBUFFER: %d\n", -errno); UNLOCK_HARDWARE(intel); exit(1); } -- cgit v1.2.3 From 3d19a095cda30ac8abdbe26cd3b664a4b97c899b Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 2 May 2008 18:24:13 -0700 Subject: Fix GEM execbuf ioctl argument. --- src/mesa/drivers/dri/intel/intel_ioctl.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_ioctl.c b/src/mesa/drivers/dri/intel/intel_ioctl.c index b84cb64633..317ff2c440 100644 --- a/src/mesa/drivers/dri/intel/intel_ioctl.c +++ b/src/mesa/drivers/dri/intel/intel_ioctl.c @@ -30,6 +30,8 @@ #include #include #include +#include +#include #include "mtypes.h" #include "context.h" @@ -170,8 +172,7 @@ intel_exec_ioctl(struct intel_context *intel, (((GLuint) intel->drawY) << 16)); do { - ret = drmCommandWriteRead(intel->driFd, DRM_I915_GEM_EXECBUFFER, &execbuf, - sizeof(execbuf)); + ret = ioctl(intel->driFd, DRM_IOCTL_I915_GEM_EXECBUFFER, execbuf); } while (ret == -EAGAIN); if (ret != 0) { -- cgit v1.2.3 From 81ec0545c93d57f72cff5099c6a34f04e9257a38 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 2 May 2008 18:25:00 -0700 Subject: Don't forget to set handle of shared buffers. (And fix a nearby whitespace nit) --- src/mesa/drivers/dri/intel/intel_bufmgr_gem.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c index a18ccd1637..37e75081ec 100644 --- a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c @@ -227,6 +227,7 @@ intel_add_validate_buffer(dri_bo *bo) bufmgr_gem->validate_array[index].buffer_handle = bo_gem->gem_handle; bufmgr_gem->validate_array[index].relocation_count = bo_gem->reloc_count; bufmgr_gem->validate_array[index].relocs_ptr = (uintptr_t)bo_gem->relocs; + bufmgr_gem->validate_array[index].alignment = 0; bufmgr_gem->validate_bo[index] = bo; dri_bo_reference(bo); bufmgr_gem->validate_count++; @@ -366,7 +367,7 @@ intel_gem_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name, open_arg.name = handle; ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_OPEN, &open_arg); if (ret != 0) { - fprintf(stderr, "Couldn't reference %s handle 0x%08x: %s\n", + fprintf(stderr, "Couldn't reference %s handle 0x%08x: %s\n", name, handle, strerror(-ret)); free(bo_gem); return NULL; @@ -378,6 +379,7 @@ intel_gem_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name, bo_gem->name = name; bo_gem->refcount = 1; bo_gem->validate_index = -1; + bo_gem->gem_handle = open_arg.handle; DBG("bo_create_from_handle: %p %08x (%s)\n", &bo_gem->bo, handle, bo_gem->name); -- cgit v1.2.3 From 367b1e35dc1dbeda65709b0ab4f7983d0c7a6cc2 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Mon, 5 May 2008 10:45:30 -0700 Subject: Temporarily disable intel pixel ops on i915 for GEM Instead of attempting to fix these for GEM, just disable until GEM is working. --- src/mesa/drivers/dri/i915/Makefile | 9 ++++++--- src/mesa/drivers/dri/i915/i830_context.c | 2 +- src/mesa/drivers/dri/i915/i915_context.c | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i915/Makefile b/src/mesa/drivers/dri/i915/Makefile index ed23410697..67f251a7fa 100644 --- a/src/mesa/drivers/dri/i915/Makefile +++ b/src/mesa/drivers/dri/i915/Makefile @@ -6,6 +6,11 @@ LIBNAME = i915_dri.so MINIGLX_SOURCES = server/intel_dri.c +PIXEL_SOURCES = \ + intel_pixel.c \ + intel_pixel_read.c \ + intel_pixel_draw.c + DRIVER_SOURCES = \ i830_context.c \ i830_metaops.c \ @@ -27,10 +32,7 @@ DRIVER_SOURCES = \ intel_tex_validate.c \ intel_tex_format.c \ intel_tex.c \ - intel_pixel.c \ intel_pixel_copy.c \ - intel_pixel_read.c \ - intel_pixel_draw.c \ intel_buffers.c \ intel_blit.c \ i915_tex.c \ @@ -68,6 +70,7 @@ DRIVER_DEFINES = -I../intel -I../intel/server -DI915 \ include ../Makefile.template intel_decode.o: ../intel/intel_decode.c + intel_tex_layout.o: ../intel/intel_tex_layout.c symlinks: diff --git a/src/mesa/drivers/dri/i915/i830_context.c b/src/mesa/drivers/dri/i915/i830_context.c index 240c57c9ad..166a3bc8e2 100644 --- a/src/mesa/drivers/dri/i915/i830_context.c +++ b/src/mesa/drivers/dri/i915/i830_context.c @@ -52,7 +52,7 @@ static void i830InitDriverFunctions(struct dd_function_table *functions) { intelInitDriverFunctions(functions); - intelInitPixelFuncs(functions); +// intelInitPixelFuncs(functions); i830InitStateFuncs(functions); i830InitTextureFuncs(functions); } diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c index 0161959099..59da40229a 100644 --- a/src/mesa/drivers/dri/i915/i915_context.c +++ b/src/mesa/drivers/dri/i915/i915_context.c @@ -94,7 +94,7 @@ static void i915InitDriverFunctions(struct dd_function_table *functions) { intelInitDriverFunctions(functions); - intelInitPixelFuncs(functions); +// intelInitPixelFuncs(functions); i915InitStateFunctions(functions); i915InitTextureFuncs(functions); i915InitFragProgFuncs(functions); -- cgit v1.2.3 From 87ccc03736166db9ef85f3eee3723b82f395d3cf Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Mon, 5 May 2008 10:46:27 -0700 Subject: Add intel_bufmgr_gem.c to i915 --- src/mesa/drivers/dri/i915/intel_bufmgr_gem.c | 1 + 1 file changed, 1 insertion(+) create mode 120000 src/mesa/drivers/dri/i915/intel_bufmgr_gem.c (limited to 'src') diff --git a/src/mesa/drivers/dri/i915/intel_bufmgr_gem.c b/src/mesa/drivers/dri/i915/intel_bufmgr_gem.c new file mode 120000 index 0000000000..dee0daf9c0 --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_bufmgr_gem.c @@ -0,0 +1 @@ +../intel/intel_bufmgr_gem.c \ No newline at end of file -- cgit v1.2.3 From 1f810b85b1e9393c8e606d2f28250cbb19cf916b Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 5 May 2008 13:40:50 -0700 Subject: Make intel_{batch,exec}_ioctl return an error code so we can recover better. --- src/mesa/drivers/dri/intel/intel_batchbuffer.c | 25 +++++++++++++++---------- src/mesa/drivers/dri/intel/intel_ioctl.c | 18 ++++++++++-------- src/mesa/drivers/dri/intel/intel_ioctl.h | 16 ++++++++-------- 3 files changed, 33 insertions(+), 26 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index 683d06a552..a95abd9ec9 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -125,6 +125,7 @@ do_flush_locked(struct intel_batchbuffer *batch, GLuint used, GLboolean allow_unlock) { struct intel_context *intel = batch->intel; + int ret = 0; dri_bo_unmap(batch->buf); @@ -142,18 +143,18 @@ do_flush_locked(struct intel_batchbuffer *batch, struct drm_i915_gem_execbuffer *execbuf; execbuf = dri_process_relocs(batch->buf); - intel_exec_ioctl(batch->intel, - used, - batch->cliprect_mode != LOOP_CLIPRECTS, - allow_unlock, - execbuf); + ret = intel_exec_ioctl(batch->intel, + used, + batch->cliprect_mode != LOOP_CLIPRECTS, + allow_unlock, + execbuf); } else { dri_process_relocs(batch->buf); - intel_batch_ioctl(batch->intel, - batch->buf->offset, - used, - batch->cliprect_mode != LOOP_CLIPRECTS, - allow_unlock); + ret = intel_batch_ioctl(batch->intel, + batch->buf->offset, + used, + batch->cliprect_mode != LOOP_CLIPRECTS, + allow_unlock); } } @@ -182,6 +183,10 @@ do_flush_locked(struct intel_batchbuffer *batch, intel->vtbl.debug_batch(intel); } + if (ret != 0) { + UNLOCK_HARDWARE(intel); + exit(1); + } intel->vtbl.new_batch(intel); } diff --git a/src/mesa/drivers/dri/intel/intel_ioctl.c b/src/mesa/drivers/dri/intel/intel_ioctl.c index 317ff2c440..591548ae85 100644 --- a/src/mesa/drivers/dri/intel/intel_ioctl.c +++ b/src/mesa/drivers/dri/intel/intel_ioctl.c @@ -106,7 +106,7 @@ intelWaitIrq(struct intel_context *intel, int seq) } -void +int intel_batch_ioctl(struct intel_context *intel, GLuint start_offset, GLuint used, @@ -115,7 +115,7 @@ intel_batch_ioctl(struct intel_context *intel, struct drm_i915_batchbuffer batch; if (intel->no_hw) - return; + return 0; assert(intel->locked); assert(used); @@ -144,12 +144,13 @@ intel_batch_ioctl(struct intel_context *intel, if (drmCommandWrite(intel->driFd, DRM_I915_BATCHBUFFER, &batch, sizeof(batch))) { fprintf(stderr, "DRM_I915_BATCHBUFFER: %d\n", -errno); - UNLOCK_HARDWARE(intel); - exit(1); + return -errno; } + + return 0; } -void +int intel_exec_ioctl(struct intel_context *intel, GLuint used, GLboolean ignore_cliprects, GLboolean allow_unlock, @@ -161,7 +162,7 @@ intel_exec_ioctl(struct intel_context *intel, assert(used); if (intel->no_hw) - return; + return 0; execbuf->batch_start_offset = 0; execbuf->batch_len = used; @@ -177,7 +178,8 @@ intel_exec_ioctl(struct intel_context *intel, if (ret != 0) { fprintf(stderr, "DRM_I915_GEM_EXECBUFFER: %d\n", -errno); - UNLOCK_HARDWARE(intel); - exit(1); + return -errno; } + + return 0; } diff --git a/src/mesa/drivers/dri/intel/intel_ioctl.h b/src/mesa/drivers/dri/intel/intel_ioctl.h index 52b0ab6102..526e38358c 100644 --- a/src/mesa/drivers/dri/intel/intel_ioctl.h +++ b/src/mesa/drivers/dri/intel/intel_ioctl.h @@ -33,14 +33,14 @@ void intelWaitIrq( struct intel_context *intel, int seq ); int intelEmitIrqLocked( struct intel_context *intel ); -void intel_batch_ioctl( struct intel_context *intel, - GLuint start_offset, - GLuint used, - GLboolean ignore_cliprects, - GLboolean allow_unlock ); -void intel_exec_ioctl(struct intel_context *intel, +int intel_batch_ioctl(struct intel_context *intel, + GLuint start_offset, GLuint used, - GLboolean ignore_cliprects, GLboolean allow_unlock, - struct drm_i915_gem_execbuffer *execbuf); + GLboolean ignore_cliprects, + GLboolean allow_unlock); +int intel_exec_ioctl(struct intel_context *intel, + GLuint used, + GLboolean ignore_cliprects, GLboolean allow_unlock, + struct drm_i915_gem_execbuffer *execbuf); #endif -- cgit v1.2.3 From 5290ec4756eb33ec27e06bb68d64c33472276ac3 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 5 May 2008 13:45:03 -0700 Subject: Initialize bufmgr_gem->validate_array[i].offset. This is just cosmetic, to produce less scary values when the ioctl fails and doesn't return values there. --- src/mesa/drivers/dri/intel/intel_bufmgr_gem.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c index 37e75081ec..0b136b19da 100644 --- a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c @@ -228,6 +228,7 @@ intel_add_validate_buffer(dri_bo *bo) bufmgr_gem->validate_array[index].relocation_count = bo_gem->reloc_count; bufmgr_gem->validate_array[index].relocs_ptr = (uintptr_t)bo_gem->relocs; bufmgr_gem->validate_array[index].alignment = 0; + bufmgr_gem->validate_array[index].buffer_offset = 0; bufmgr_gem->validate_bo[index] = bo; dri_bo_reference(bo); bufmgr_gem->validate_count++; -- cgit v1.2.3 From be59d52ca0c0a5b93963297d596972fccb792b69 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 5 May 2008 14:15:40 -0700 Subject: Print GEM handles instead of BO pointers in debugging. small integers are much prettier, and let me correlate to DRM debug output. --- src/mesa/drivers/dri/intel/intel_bufmgr_gem.c | 33 +++++++++++++++------------ 1 file changed, 18 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c index 0b136b19da..fd161cbeb8 100644 --- a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c @@ -171,7 +171,7 @@ static void dri_gem_dump_validation_list(dri_bufmgr_gem *bufmgr_gem) dri_bo_gem *bo_gem = (dri_bo_gem *)bo; if (bo_gem->relocs == NULL) { - DBG("%2d: %s\n", i, bo_gem->name); + DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle, bo_gem->name); continue; } @@ -179,9 +179,9 @@ static void dri_gem_dump_validation_list(dri_bufmgr_gem *bufmgr_gem) dri_bo *target_bo = bo_gem->reloc_target_bo[j]; dri_bo_gem *target_gem = (dri_bo_gem *)target_bo; - DBG("%2d: %s@0x%08llx -> %s@0x%08lx + 0x%08x\n", + DBG("%2d: %d (%s)@0x%08llx -> %s@0x%08lx + 0x%08x\n", i, - bo_gem->name, bo_gem->relocs[j].offset, + bo_gem->gem_handle, bo_gem->name, bo_gem->relocs[j].offset, target_gem->name, target_bo->offset, bo_gem->relocs[j].delta); } @@ -328,7 +328,8 @@ dri_gem_bo_alloc(dri_bufmgr *bufmgr, const char *name, bo_gem->refcount = 1; bo_gem->validate_index = -1; - DBG("bo_create: %p (%s) %ldb\n", &bo_gem->bo, bo_gem->name, size); + DBG("bo_create: buf %d (%s) %ldb\n", + bo_gem->gem_handle, bo_gem->name, size); return &bo_gem->bo; } @@ -382,8 +383,7 @@ intel_gem_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name, bo_gem->validate_index = -1; bo_gem->gem_handle = open_arg.handle; - DBG("bo_create_from_handle: %p %08x (%s)\n", - &bo_gem->bo, handle, bo_gem->name); + DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name); return &bo_gem->bo; } @@ -444,12 +444,14 @@ dri_gem_bo_unreference(dri_bo *bo) unref.handle = bo_gem->gem_handle; ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_UNREFERENCE, &unref); if (ret != 0) { - fprintf(stderr, "DRM_IOCTL_GEM_UNREFERENCE failed (%s): %s\n", - bo_gem->name, strerror(-ret)); + fprintf(stderr, + "DRM_IOCTL_GEM_UNREFERENCE %d failed (%s): %s\n", + bo_gem->gem_handle, bo_gem->name, strerror(-ret)); } } - DBG("bo_unreference final: %p (%s)\n", &bo_gem->bo, bo_gem->name); + DBG("bo_unreference final: %d (%s)\n", + bo_gem->gem_handle, bo_gem->name); free(bo); return; @@ -473,7 +475,7 @@ dri_gem_bo_map(dri_bo *bo, GLboolean write_enable) assert(bo->virtual == NULL); - DBG("bo_map: %p (%s)\n", &bo_gem->bo, bo_gem->name); + DBG("bo_map: %d (%s)\n", bo_gem->gem_handle, bo_gem->name); if (bo_gem->virtual == NULL) { struct drm_gem_mmap mmap_arg; @@ -484,8 +486,9 @@ dri_gem_bo_map(dri_bo *bo, GLboolean write_enable) mmap_arg.size = bo->size; ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_MMAP, &mmap_arg); if (ret != 0) { - fprintf(stderr, "%s:%d: Error mapping buffer %s: %s .\n", - __FILE__, __LINE__, bo_gem->name, strerror(-ret)); + fprintf(stderr, "%s:%d: Error mapping buffer %d (%s): %s .\n", + __FILE__, __LINE__, + bo_gem->gem_handle, bo_gem->name, strerror(-ret)); } bo_gem->virtual = (void *)(uintptr_t)mmap_arg.addr_ptr; } @@ -514,7 +517,7 @@ dri_gem_bo_unmap(dri_bo *bo) assert(bo->virtual != NULL); - DBG("bo_unmap: %p (%s)\n", &bo_gem->bo, bo_gem->name); + DBG("bo_unmap: %d (%s)\n", bo_gem->gem_handle, bo_gem->name); munmap(bo_gem->virtual, bo->size); bo_gem->virtual = NULL; @@ -653,8 +656,8 @@ intel_update_buffer_offsets (dri_bufmgr_gem *bufmgr_gem) /* Update the buffer offset */ if (bufmgr_gem->validate_array[i].buffer_offset != bo->offset) { - DBG("BO %s migrated: 0x%08lx -> 0x%08llx\n", - bo_gem->name, bo->offset, + DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", + bo_gem->gem_handle, bo_gem->name, bo->offset, bufmgr_gem->validate_array[i].buffer_offset); bo->offset = bufmgr_gem->validate_array[i].buffer_offset; } -- cgit v1.2.3 From 01d1a292bf53ab949cf2075f18986b58fa468a61 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 5 May 2008 14:20:18 -0700 Subject: GEM: Set validate index to keep the same buffer from being duped on the list. --- src/mesa/drivers/dri/intel/intel_bufmgr_gem.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c index fd161cbeb8..ca0a92cfe8 100644 --- a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c @@ -223,6 +223,7 @@ intel_add_validate_buffer(dri_bo *bo) } index = bufmgr_gem->validate_count; + bo_gem->validate_index = index; /* Fill in array entry */ bufmgr_gem->validate_array[index].buffer_handle = bo_gem->gem_handle; bufmgr_gem->validate_array[index].relocation_count = bo_gem->reloc_count; -- cgit v1.2.3 From 1decab06d15f0dead0a544dbed2f10041caac844 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 5 May 2008 15:44:49 -0700 Subject: GEM: Include target buffer handle in relocation debug. --- src/mesa/drivers/dri/intel/intel_bufmgr_gem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c index ca0a92cfe8..6f0ecfbdb8 100644 --- a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c @@ -179,10 +179,10 @@ static void dri_gem_dump_validation_list(dri_bufmgr_gem *bufmgr_gem) dri_bo *target_bo = bo_gem->reloc_target_bo[j]; dri_bo_gem *target_gem = (dri_bo_gem *)target_bo; - DBG("%2d: %d (%s)@0x%08llx -> %s@0x%08lx + 0x%08x\n", + DBG("%2d: %d (%s)@0x%08llx -> %d (%s)@0x%08lx + 0x%08x\n", i, bo_gem->gem_handle, bo_gem->name, bo_gem->relocs[j].offset, - target_gem->name, target_bo->offset, + target_gem->gem_handle, target_gem->name, target_bo->offset, bo_gem->relocs[j].delta); } } -- cgit v1.2.3 From e9a2a67745d46509928263f0556f5c0a4211b94f Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 5 May 2008 15:45:15 -0700 Subject: GEM: Allocate the right number of relocs, avoiding heap smashing. --- src/mesa/drivers/dri/intel/intel_bufmgr_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c index 6f0ecfbdb8..e4e8481b96 100644 --- a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c @@ -247,7 +247,7 @@ intel_setup_reloc_list(dri_bo *bo) bo_gem->relocs = calloc(bufmgr_gem->max_relocs, sizeof(struct drm_i915_gem_relocation_entry)); - bo_gem->reloc_target_bo = calloc(1, sizeof(dri_bo *)); + bo_gem->reloc_target_bo = calloc(bufmgr_gem->max_relocs, sizeof(dri_bo *)); return 0; } -- cgit v1.2.3 From df4b49c2cedde60c02f869977ee426f280b2985b Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Mon, 5 May 2008 22:08:05 -0700 Subject: Dump buffer tiled status from intelPrintSAREA --- src/mesa/drivers/dri/intel/intel_screen.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index 7e0713c4f4..356e50e726 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -221,16 +221,16 @@ intelPrintSAREA(const struct drm_i915_sarea * sarea) sarea->height); fprintf(stderr, "SAREA: pitch: %d\n", sarea->pitch); fprintf(stderr, - "SAREA: front offset: 0x%08x size: 0x%x handle: 0x%x\n", + "SAREA: front offset: 0x%08x size: 0x%x handle: 0x%x tiled: %d\n", sarea->front_offset, sarea->front_size, - (unsigned) sarea->front_handle); + (unsigned) sarea->front_handle, sarea->front_tiled); fprintf(stderr, - "SAREA: back offset: 0x%08x size: 0x%x handle: 0x%x\n", + "SAREA: back offset: 0x%08x size: 0x%x handle: 0x%x tiled: %d\n", sarea->back_offset, sarea->back_size, - (unsigned) sarea->back_handle); - fprintf(stderr, "SAREA: depth offset: 0x%08x size: 0x%x handle: 0x%x\n", + (unsigned) sarea->back_handle, sarea->back_tiled); + fprintf(stderr, "SAREA: depth offset: 0x%08x size: 0x%x handle: 0x%x tiled: %d\n", sarea->depth_offset, sarea->depth_size, - (unsigned) sarea->depth_handle); + (unsigned) sarea->depth_handle, sarea->depth_tiled); fprintf(stderr, "SAREA: tex offset: 0x%08x size: 0x%x handle: 0x%x\n", sarea->tex_offset, sarea->tex_size, (unsigned) sarea->tex_handle); } -- cgit v1.2.3 From 537bbe6dec780f6f85838fe7e6036579c509f8a6 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Tue, 6 May 2008 10:51:08 -0700 Subject: [intel-GEM] Add tiling support to swrast. Accessing tiled surfaces without using the fence registers requires that software deal with the address swizzling itself. --- src/mesa/drivers/dri/intel/intel_context.c | 3 + src/mesa/drivers/dri/intel/intel_fbo.c | 16 +- src/mesa/drivers/dri/intel/intel_fbo.h | 3 +- src/mesa/drivers/dri/intel/intel_screen.c | 18 +- src/mesa/drivers/dri/intel/intel_screen.h | 2 + src/mesa/drivers/dri/intel/intel_span.c | 327 +++++++++++++++++++++++++++-- src/mesa/drivers/dri/intel/intel_span.h | 6 +- 7 files changed, 350 insertions(+), 25 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 4a1e1a9ac0..d258e669c0 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -498,6 +498,9 @@ intel_init_bufmgr(struct intel_context *intel) intel); } + /* XXX bufmgr should be per-screen, not per-context */ + intelScreen->ttm = intel->ttm; + return GL_TRUE; } diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index b3f6610546..bc0b579429 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -295,7 +295,8 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, rb->Height = height; /* This sets the Get/PutRow/Value functions */ - intel_set_span_functions(&irb->Base); + /* XXX can we choose a different tile here? */ + intel_set_span_functions(&irb->Base, INTEL_TILE_NONE); return GL_TRUE; } @@ -375,7 +376,7 @@ intel_renderbuffer_set_region(struct intel_renderbuffer *rb, * not a user-created renderbuffer. */ struct intel_renderbuffer * -intel_create_renderbuffer(GLenum intFormat) +intel_create_renderbuffer(GLenum intFormat, int tiling) { GET_CURRENT_CONTEXT(ctx); @@ -442,12 +443,14 @@ intel_create_renderbuffer(GLenum intFormat) irb->Base.InternalFormat = intFormat; + irb->tiling = tiling; + /* intel-specific methods */ irb->Base.Delete = intel_delete_renderbuffer; irb->Base.AllocStorage = intel_alloc_window_storage; irb->Base.GetPointer = intel_get_pointer; /* This sets the Get/PutRow/Value functions */ - intel_set_span_functions(&irb->Base); + intel_set_span_functions(&irb->Base, tiling); return irb; } @@ -519,7 +522,7 @@ intel_framebuffer_renderbuffer(GLcontext * ctx, static GLboolean intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb, - struct gl_texture_image *texImage) + struct gl_texture_image *texImage) { if (texImage->TexFormat == &_mesa_texformat_argb8888) { irb->Base._ActualFormat = GL_RGBA8; @@ -558,7 +561,7 @@ intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb, irb->Base.Delete = intel_delete_renderbuffer; irb->Base.AllocStorage = intel_nop_alloc_storage; - intel_set_span_functions(&irb->Base); + intel_set_span_functions(&irb->Base, irb->tiling); irb->RenderToTexture = GL_TRUE; @@ -586,6 +589,9 @@ intel_wrap_texture(GLcontext * ctx, struct gl_texture_image *texImage) _mesa_init_renderbuffer(&irb->Base, name); irb->Base.ClassID = INTEL_RB_CLASS; + /* XXX can we fix this? */ + irb->tiling = INTEL_TILE_NONE; + if (!intel_update_wrapper(ctx, irb, texImage)) { _mesa_free(irb); return NULL; diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h index c90c84b48c..9e085a1992 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.h +++ b/src/mesa/drivers/dri/intel/intel_fbo.h @@ -72,6 +72,7 @@ struct intel_renderbuffer struct intel_region *region; void *pfMap; /* possibly paged flipped map pointer */ GLuint pfPitch; /* possibly paged flipped pitch */ + int tiling; GLboolean RenderToTexture; /* RTT? */ GLuint PairedDepth; /**< only used if this is a depth renderbuffer */ @@ -90,7 +91,7 @@ intel_renderbuffer_set_region(struct intel_renderbuffer *irb, struct intel_region *region); extern struct intel_renderbuffer * -intel_create_renderbuffer(GLenum intFormat); +intel_create_renderbuffer(GLenum intFormat, int tiling); extern void intel_fbo_init(struct intel_context *intel); diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index 356e50e726..a243324a39 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -524,20 +524,23 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, /* setup the hardware-based renderbuffers */ { - intel_fb->color_rb[0] = intel_create_renderbuffer(rgbFormat); + intel_fb->color_rb[0] = intel_create_renderbuffer(rgbFormat, + screen->ttm ? screen->front.tiled : INTEL_TILE_NONE); _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_FRONT_LEFT, &intel_fb->color_rb[0]->Base); } if (mesaVis->doubleBufferMode) { - intel_fb->color_rb[1] = intel_create_renderbuffer(rgbFormat); + intel_fb->color_rb[1] = intel_create_renderbuffer(rgbFormat, + screen->ttm ? screen->back.tiled : INTEL_TILE_NONE); _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_BACK_LEFT, &intel_fb->color_rb[1]->Base); if (screen->third.handle) { struct gl_renderbuffer *tmp_rb = NULL; - intel_fb->color_rb[2] = intel_create_renderbuffer(rgbFormat); + intel_fb->color_rb[2] = intel_create_renderbuffer(rgbFormat, + screen->ttm ? screen->third.tiled : INTEL_TILE_NONE); _mesa_reference_renderbuffer(&tmp_rb, &intel_fb->color_rb[2]->Base); } } @@ -546,7 +549,8 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, if (mesaVis->stencilBits == 8) { /* combined depth/stencil buffer */ struct intel_renderbuffer *depthStencilRb - = intel_create_renderbuffer(GL_DEPTH24_STENCIL8_EXT); + = intel_create_renderbuffer(GL_DEPTH24_STENCIL8_EXT, + screen->ttm ? screen->depth.tiled : INTEL_TILE_NONE); /* note: bind RB to two attachment points */ _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH, &depthStencilRb->Base); @@ -554,7 +558,8 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, &depthStencilRb->Base); } else { struct intel_renderbuffer *depthRb - = intel_create_renderbuffer(GL_DEPTH_COMPONENT24); + = intel_create_renderbuffer(GL_DEPTH_COMPONENT24, + screen->ttm ? screen->depth.tiled : INTEL_TILE_NONE); _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH, &depthRb->Base); } @@ -562,7 +567,8 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, else if (mesaVis->depthBits == 16) { /* just 16-bit depth buffer, no hw stencil */ struct intel_renderbuffer *depthRb - = intel_create_renderbuffer(GL_DEPTH_COMPONENT16); + = intel_create_renderbuffer(GL_DEPTH_COMPONENT16, + screen->ttm ? screen->depth.tiled : INTEL_TILE_NONE); _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH, &depthRb->Base); } diff --git a/src/mesa/drivers/dri/intel/intel_screen.h b/src/mesa/drivers/dri/intel/intel_screen.h index e62b2d7c89..9a73b13951 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.h +++ b/src/mesa/drivers/dri/intel/intel_screen.h @@ -74,6 +74,8 @@ typedef struct int irq_active; int allow_batchbuffer; + int ttm; + /** * Configuration cache with default values for all contexts */ diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index df4f5927a0..149b581d88 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -39,6 +39,111 @@ #include "swrast/swrast.h" +/* + * Deal with tiled surfaces + */ + +#if 0 +/* These are pre-965 tile swizzling functions -- power of two widths */ +static uintptr_t x_tile_swizzle_pow2 (uintptr_t addr, int n) +{ + uintptr_t a = addr; + uintptr_t base_mask = (((~0) << (n + 4)) | 0xff); + uintptr_t x_mask = ((~0) << 12) & ~base_mask; + + a = ((a & base_mask) | + ((a >> (n-8)) & 0x7) | + ((a << 3) & x_mask)); + _mesa_printf ("x_swizzle %08x (base %x yrow %x tile#x %x xsword %x byte %x) %08x\n", + addr, + addr >> (n + 4), + (addr >> (n + 1)) & 0x7, + (addr >> 9) & ((1 << (n-8)) - 1), + (addr >> 5) & 0xf, + (addr & 0x1f), + a); + return a; +} + +static uintptr_t y_tile_swizzle_pow2 (uintptr_t addr, int n) +{ + uintptr_t a = (uintptr_t) addr; + uintptr_t base_mask = (((~0) << (n + 6)) | 0xf); + uintptr_t x_mask = ((~0) << 9) & ~base_mask; + + a = ((a & base_mask) | + ((a >> (n-3)) & 0x1f) | + ((a << 5) & x_mask)); + _mesa_printf ("y_swizzle %08x (base %x yrow %x tile#x %x xoword %x byte %x) %08x\n", + addr, + addr >> (n + 6), + (addr >> (n + 1)) & 0x01f, + (addr >> 7) & ((1 << (n-6)) - 1), + (addr >> 4) & 0x7, + (addr & 0xf), + a); + return a; +} +#endif + +static GLubyte *x_tile_swizzle(struct intel_renderbuffer *irb, struct intel_context *intel, + int x, int y) +{ + GLubyte *buf = (GLubyte *) irb->pfMap; + int tile_stride; + int xbyte; + int x_tile_off, y_tile_off; + int x_tile_number, y_tile_number; + int tile_off, tile_base; + + tile_stride = (irb->pfPitch * irb->region->cpp) << 3; + + x += intel->drawX; + y += intel->drawY; + + xbyte = x * irb->region->cpp; + + x_tile_off = xbyte & 0x1ff; + y_tile_off = y & 7; + + x_tile_number = xbyte >> 9; + y_tile_number = y >> 3; + + tile_off = (y_tile_off << 9) + x_tile_off; + tile_base = (x_tile_number << 12) + y_tile_number * tile_stride; + + return buf + tile_base + tile_off; +} + +static GLubyte *y_tile_swizzle(struct intel_renderbuffer *irb, struct intel_context *intel, + int x, int y) +{ + GLubyte *buf = (GLubyte *) irb->pfMap; + int tile_stride; + int xbyte; + int x_tile_off, y_tile_off; + int x_tile_number, y_tile_number; + int tile_off, tile_base; + + tile_stride = (irb->pfPitch * irb->region->cpp) << 3; + + x += intel->drawX; + y += intel->drawY; + + xbyte = x * irb->region->cpp; + + x_tile_off = xbyte & 0x7f; + y_tile_off = y & 0x1f; + + x_tile_number = xbyte >> 7; + y_tile_number = y >> 5; + + tile_off = ((x_tile_off & ~0xf) << 5) + (y_tile_off << 4) + (x_tile_off & 0xf); + tile_base = (x_tile_number << 12) + y_tile_number * tile_stride; + + return buf + tile_base + tile_off; +} + /* break intelWriteRGBASpan_ARGB8888 */ @@ -55,7 +160,7 @@ + (intel->drawY * irb->pfPitch + intel->drawX) * irb->region->cpp;\ GLuint p; \ assert(irb->pfMap);\ - (void) p; + (void) p; (void) buf; /* XXX FBO: this is identical to the macro in spantmp2.h except we get * the cliprect info from the context, not the driDrawable. @@ -69,12 +174,14 @@ int miny = intel->pClipRects[_nc].y1 - intel->drawY; \ int maxx = intel->pClipRects[_nc].x2 - intel->drawX; \ int maxy = intel->pClipRects[_nc].y2 - intel->drawY; - - - + +#if 0 + }} +#endif #define Y_FLIP(_y) ((_y) * yScale + yBias) +/* XXX with GEM, these need to tell the kernel */ #define HW_LOCK() #define HW_UNLOCK() @@ -99,6 +206,43 @@ #define GET_PTR(X,Y) (buf + ((Y) * irb->pfPitch + (X)) * 4) #include "spantmp2.h" +/* 16 bit RGB565 color tile spanline and pixel functions + */ + +#define SPANTMP_PIXEL_FMT GL_RGB +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5 + +#define TAG(x) intel_XTile_##x##_RGB565 +#define TAG2(x,y) intel_XTile_##x##_RGB565##y +#define GET_PTR(X,Y) x_tile_swizzle(irb, intel, X, Y) +#include "spantmp2.h" + +#define SPANTMP_PIXEL_FMT GL_RGB +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5 + +#define TAG(x) intel_YTile_##x##_RGB565 +#define TAG2(x,y) intel_YTile_##x##_RGB565##y +#define GET_PTR(X,Y) y_tile_swizzle(irb, intel, X, Y) +#include "spantmp2.h" + +/* 32 bit ARGB888 color tile spanline and pixel functions + */ + +#define SPANTMP_PIXEL_FMT GL_BGRA +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV + +#define TAG(x) intel_XTile_##x##_ARGB8888 +#define TAG2(x,y) intel_XTile_##x##_ARGB8888##y +#define GET_PTR(X,Y) x_tile_swizzle(irb, intel, X, Y) +#include "spantmp2.h" + +#define SPANTMP_PIXEL_FMT GL_BGRA +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV + +#define TAG(x) intel_YTile_##x##_ARGB8888 +#define TAG2(x,y) intel_YTile_##x##_ARGB8888##y +#define GET_PTR(X,Y) y_tile_swizzle(irb, intel, X, Y) +#include "spantmp2.h" #define LOCAL_DEPTH_VARS \ struct intel_context *intel = intel_context(ctx); \ @@ -107,7 +251,7 @@ const GLint yScale = irb->RenderToTexture ? 1 : -1; \ const GLint yBias = irb->RenderToTexture ? 0 : irb->Base.Height - 1; \ char *buf = (char *) irb->pfMap/*XXX use region->map*/ + \ - (intel->drawY * pitch + intel->drawX) * irb->region->cpp; + (intel->drawY * pitch + intel->drawX) * irb->region->cpp; (void) buf; #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS @@ -126,6 +270,33 @@ #include "depthtmp.h" +/** + ** 16-bit x tile depthbuffer functions. + **/ +#define WRITE_DEPTH( _x, _y, d ) \ + (*((GLushort *)x_tile_swizzle (irb, intel, _x, _y)) = d) + +#define READ_DEPTH( d, _x, _y ) \ + d = *((GLushort *)x_tile_swizzle (irb, intel, _x, _y)) + + +#define TAG(x) intel_XTile_##x##_z16 +#include "depthtmp.h" + +/** + ** 16-bit y tile depthbuffer functions. + **/ +#define WRITE_DEPTH( _x, _y, d ) \ + (*((GLushort *)y_tile_swizzle (irb, intel, _x, _y)) = d) + +#define READ_DEPTH( d, _x, _y ) \ + (d = *((GLushort *)y_tile_swizzle (irb, intel, _x, _y))) + + +#define TAG(x) intel_YTile_##x##_z16 +#include "depthtmp.h" + + /** ** 24/8-bit interleaved depth/stencil functions ** Note: we're actually reading back combined depth+stencil values. @@ -148,6 +319,49 @@ #include "depthtmp.h" +/** + ** 24/8-bit x-tile interleaved depth/stencil functions + ** Note: we're actually reading back combined depth+stencil values. + ** The wrappers in main/depthstencil.c are used to extract the depth + ** and stencil values. + **/ +/* Change ZZZS -> SZZZ */ +#define WRITE_DEPTH( _x, _y, d ) { \ + GLuint tmp = ((d) >> 8) | ((d) << 24); \ + *((GLuint *)x_tile_swizzle (irb, intel, _x, _y)) = tmp; \ +} + +/* Change SZZZ -> ZZZS */ +#define READ_DEPTH( d, _x, _y ) { \ + GLuint tmp = *((GLuint *)x_tile_swizzle (irb, intel, _x, _y)); \ + d = (tmp << 8) | (tmp >> 24); \ +} + +#define TAG(x) intel_XTile_##x##_z24_s8 +#include "depthtmp.h" + +/** + ** 24/8-bit y-tile interleaved depth/stencil functions + ** Note: we're actually reading back combined depth+stencil values. + ** The wrappers in main/depthstencil.c are used to extract the depth + ** and stencil values. + **/ +/* Change ZZZS -> SZZZ */ +#define WRITE_DEPTH( _x, _y, d ) { \ + GLuint tmp = ((d) >> 8) | ((d) << 24); \ + *((GLuint *)y_tile_swizzle (irb, intel, _x, _y)) = tmp; \ +} + +/* Change SZZZ -> ZZZS */ +#define READ_DEPTH( d, _x, _y ) { \ + GLuint tmp = *((GLuint *)y_tile_swizzle (irb, intel, _x, _y)); \ + d = (tmp << 8) | (tmp >> 24); \ +} + +#define TAG(x) intel_YTile_##x##_z24_s8 +#include "depthtmp.h" + + /** ** 8-bit stencil function (XXX FBO: This is obsolete) **/ @@ -164,6 +378,40 @@ #define TAG(x) intel##x##_z24_s8 #include "stenciltmp.h" +/** + ** 8-bit x-tile stencil function (XXX FBO: This is obsolete) + **/ +#define WRITE_STENCIL( _x, _y, d ) { \ + GLuint *a = (GLuint *) x_tile_swizzle (irb, intel, _x, _y); \ + GLuint tmp = *a; \ + tmp &= 0xffffff; \ + tmp |= ((d) << 24); \ + *a = tmp; \ +} + +#define READ_STENCIL( d, _x, _y ) \ + (d = *((GLuint*) x_tile_swizzle (irb, intel, _x, _y)) >> 24) + +#define TAG(x) intel_XTile_##x##_z24_s8 +#include "stenciltmp.h" + +/** + ** 8-bit y-tile stencil function (XXX FBO: This is obsolete) + **/ +#define WRITE_STENCIL( _x, _y, d ) { \ + GLuint *a = (GLuint *) y_tile_swizzle (irb, intel, _x, _y); \ + GLuint tmp = *a; \ + tmp &= 0xffffff; \ + tmp |= ((d) << 24); \ + *a = tmp; \ +} + +#define READ_STENCIL( d, _x, _y ) \ + (d = *((GLuint*) y_tile_swizzle (irb, intel, _x, _y)) >> 24) + +#define TAG(x) intel_YTile_##x##_z24_s8 +#include "stenciltmp.h" + /** @@ -379,25 +627,80 @@ intelInitSpanFuncs(GLcontext * ctx) * These are used for the software fallbacks. */ void -intel_set_span_functions(struct gl_renderbuffer *rb) +intel_set_span_functions(struct gl_renderbuffer *rb, int tiling) { if (rb->_ActualFormat == GL_RGB5) { /* 565 RGB */ - intelInitPointers_RGB565(rb); + switch (tiling) { + case INTEL_TILE_NONE: + default: + intelInitPointers_RGB565(rb); + break; + case INTEL_TILE_X: + intel_XTile_InitPointers_RGB565(rb); + break; + case INTEL_TILE_Y: + intel_YTile_InitPointers_RGB565(rb); + break; + } } else if (rb->_ActualFormat == GL_RGBA8) { /* 8888 RGBA */ - intelInitPointers_ARGB8888(rb); + switch (tiling) { + case INTEL_TILE_NONE: + default: + intelInitPointers_ARGB8888(rb); + break; + case INTEL_TILE_X: + intel_XTile_InitPointers_ARGB8888(rb); + break; + case INTEL_TILE_Y: + intel_YTile_InitPointers_ARGB8888(rb); + break; + } } else if (rb->_ActualFormat == GL_DEPTH_COMPONENT16) { - intelInitDepthPointers_z16(rb); + switch (tiling) { + case INTEL_TILE_NONE: + default: + intelInitDepthPointers_z16(rb); + break; + case INTEL_TILE_X: + intel_XTile_InitDepthPointers_z16(rb); + break; + case INTEL_TILE_Y: + intel_YTile_InitDepthPointers_z16(rb); + break; + } } else if (rb->_ActualFormat == GL_DEPTH_COMPONENT24 || /* XXX FBO remove */ rb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT) { - intelInitDepthPointers_z24_s8(rb); + switch (tiling) { + case INTEL_TILE_NONE: + default: + intelInitDepthPointers_z24_s8(rb); + break; + case INTEL_TILE_X: + intel_XTile_InitDepthPointers_z24_s8(rb); + break; + case INTEL_TILE_Y: + intel_YTile_InitDepthPointers_z24_s8(rb); + break; + } } - else if (rb->_ActualFormat == GL_STENCIL_INDEX8_EXT) { /* XXX FBO remove */ - intelInitStencilPointers_z24_s8(rb); + else if (rb->_ActualFormat == GL_STENCIL_INDEX8_EXT) { + switch (tiling) { + case INTEL_TILE_NONE: + default: + intelInitStencilPointers_z24_s8(rb); + break; + case INTEL_TILE_X: + intel_XTile_InitStencilPointers_z24_s8(rb); + break; + case INTEL_TILE_Y: + intel_YTile_InitStencilPointers_z24_s8(rb); + break; + } } else { _mesa_problem(NULL, diff --git a/src/mesa/drivers/dri/intel/intel_span.h b/src/mesa/drivers/dri/intel/intel_span.h index 5201f6d6c6..c56e5e1611 100644 --- a/src/mesa/drivers/dri/intel/intel_span.h +++ b/src/mesa/drivers/dri/intel/intel_span.h @@ -33,6 +33,10 @@ extern void intelInitSpanFuncs(GLcontext * ctx); extern void intelSpanRenderFinish(GLcontext * ctx); extern void intelSpanRenderStart(GLcontext * ctx); -extern void intel_set_span_functions(struct gl_renderbuffer *rb); +extern void intel_set_span_functions(struct gl_renderbuffer *rb, int tiling); + +#define INTEL_TILE_NONE 0 +#define INTEL_TILE_X 1 +#define INTEL_TILE_Y 2 #endif -- cgit v1.2.3 From a2ec8570aeb838700fa97b8c5ba6d9d383e5606e Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Tue, 6 May 2008 22:06:41 -0700 Subject: [intel-GEM] partial support for memory domains. Doesn't deal with local modifications yet (need new kernel set_domain ioctl for that to work). Also, guesses what domains are affected based on the read/write bits set in the flags. Works for 915, probably not so much for 965. --- src/mesa/drivers/dri/intel/intel_bufmgr_gem.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c index e4e8481b96..69d90e19d8 100644 --- a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c @@ -123,6 +123,8 @@ typedef struct _dri_bo_gem { dri_bo **reloc_target_bo; /** Number of entries in relocs */ int reloc_count; + /** Memory domains for synchronization */ + uint32_t read_domains, write_domain; /** Mapped address for the buffer */ void *virtual; } dri_bo_gem; @@ -230,6 +232,8 @@ intel_add_validate_buffer(dri_bo *bo) bufmgr_gem->validate_array[index].relocs_ptr = (uintptr_t)bo_gem->relocs; bufmgr_gem->validate_array[index].alignment = 0; bufmgr_gem->validate_array[index].buffer_offset = 0; + bufmgr_gem->validate_array[index].read_domains = bo_gem->read_domains; + bufmgr_gem->validate_array[index].write_domain = bo_gem->write_domain; bufmgr_gem->validate_bo[index] = bo; dri_bo_reference(bo); bufmgr_gem->validate_count++; @@ -597,6 +601,18 @@ dri_gem_emit_reloc(dri_bo *bo, uint64_t flags, GLuint delta, bo_gem->reloc_target_bo[bo_gem->reloc_count] = target_bo; dri_bo_reference(target_bo); + /** XXX set memory domains, using existing TTM flags (which is wrong) */ + if (flags & DRM_BO_FLAG_WRITE) + { + /* assume this means the rendering buffer */ + target_bo_gem->read_domains |= DRM_GEM_DOMAIN_I915_RENDER; + target_bo_gem->write_domain = DRM_GEM_DOMAIN_I915_RENDER; + } + if (flags & DRM_BO_FLAG_READ) + { + /* assume this means the sampler buffer */ + target_bo_gem->read_domains |= DRM_GEM_DOMAIN_I915_SAMPLER; + } bo_gem->reloc_count++; return 0; } @@ -629,7 +645,10 @@ dri_gem_bo_process_reloc(dri_bo *bo) static void * dri_gem_process_reloc(dri_bo *batch_buf) { - dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)batch_buf->bufmgr; + dri_bo_gem *bo_gem = (dri_bo_gem *)batch_buf; + dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *) batch_buf->bufmgr; + + bo_gem->read_domains |= DRM_GEM_DOMAIN_I915_COMMAND; /* Update indices and set up the validate list. */ dri_gem_bo_process_reloc(batch_buf); @@ -680,6 +699,9 @@ dri_gem_post_submit(dri_bo *batch_buf) dri_bo *bo = bufmgr_gem->validate_bo[i]; dri_bo_gem *bo_gem = (dri_bo_gem *)bo; + /* clear read/write domain bits */ + bo_gem->read_domains = 0; + bo_gem->write_domain = 0; /* Disconnect the buffer from the validate list */ bo_gem->validate_index = -1; dri_bo_unreference(bo); -- cgit v1.2.3 From 8b2a7f08bc446deef497f2a0d3b54d9b70bdaf9c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 7 May 2008 10:01:14 -0700 Subject: GEM: Don't emit an extra MI_FLUSH in the batch since GEM handles it. --- src/mesa/drivers/dri/intel/intel_batchbuffer.c | 31 +++++++++++++++----------- 1 file changed, 18 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index a95abd9ec9..bab8e645d4 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -195,7 +195,7 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, int line) { struct intel_context *intel = batch->intel; - GLuint used = batch->ptr - batch->map; + GLuint used; GLboolean was_locked = intel->locked; if (used == 0) @@ -204,21 +204,26 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, if (INTEL_DEBUG & DEBUG_BATCH) fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line, used); - /* Add the MI_BATCH_BUFFER_END. Always add an MI_FLUSH - this is a - * performance drain that we would like to avoid. - */ - if (used & 4) { - ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd(); - ((int *) batch->ptr)[1] = 0; - ((int *) batch->ptr)[2] = MI_BATCH_BUFFER_END; - used += 12; + + /* Emit a flush if the bufmgr doesn't do it for us. */ + if (!intel->ttm) { + *(GLuint *) (batch->ptr) = intel->vtbl.flush_cmd(); + batch->ptr += 4; } - else { - ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd(); - ((int *) batch->ptr)[1] = MI_BATCH_BUFFER_END; - used += 8; + + /* Round batchbuffer usage to 2 DWORDs. */ + used = batch->ptr - batch->map; + if ((used & 4) == 0) { + *(GLuint *) (batch->ptr) = 0; /* noop */ + batch->ptr += 4; } + /* Mark the end of the buffer. */ + *(GLuint *) (batch->ptr) = MI_BATCH_BUFFER_END; /* noop */ + batch->ptr += 4; + + used = batch->ptr - batch->map; + /* Workaround for recursive batchbuffer flushing: If the window is * moved, we can get into a case where we try to flush during a * flush. What happens is that when we try to grab the lock for -- cgit v1.2.3 From ab50ddaa9173ae108833db0edb209045788efc41 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 7 May 2008 13:51:29 -0700 Subject: GEM: Make dri_emit_reloc take GEM domain flags instead of TTM flags. The GEM flags are much more descriptive for what we need. Since this makes bufmgr_fake rather device-specific, move it to the intel common directory. We've wanted to do device-specific stuff to it before. --- src/mesa/drivers/dri/Makefile.template | 3 +- src/mesa/drivers/dri/common/dri_bufmgr.c | 8 +- src/mesa/drivers/dri/common/dri_bufmgr.h | 21 +- src/mesa/drivers/dri/common/dri_bufmgr_fake.c | 1174 ---------------------- src/mesa/drivers/dri/i915/Makefile | 1 + src/mesa/drivers/dri/i915/i830_vtbl.c | 6 +- src/mesa/drivers/dri/i915/i915_vtbl.c | 6 +- src/mesa/drivers/dri/i915/intel_bufmgr_fake.c | 1 + src/mesa/drivers/dri/i965/Makefile | 1 + src/mesa/drivers/dri/i965/brw_cc.c | 3 +- src/mesa/drivers/dri/i965/brw_clip_state.c | 3 +- src/mesa/drivers/dri/i965/brw_curbe.c | 3 +- src/mesa/drivers/dri/i965/brw_draw_upload.c | 9 +- src/mesa/drivers/dri/i965/brw_gs_state.c | 2 +- src/mesa/drivers/dri/i965/brw_misc_state.c | 19 +- src/mesa/drivers/dri/i965/brw_sf_state.c | 4 +- src/mesa/drivers/dri/i965/brw_vs_state.c | 2 +- src/mesa/drivers/dri/i965/brw_wm_sampler_state.c | 2 +- src/mesa/drivers/dri/i965/brw_wm_state.c | 6 +- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 16 +- src/mesa/drivers/dri/i965/intel_bufmgr_fake.c | 1 + src/mesa/drivers/dri/intel/intel_batchbuffer.c | 6 +- src/mesa/drivers/dri/intel/intel_batchbuffer.h | 9 +- src/mesa/drivers/dri/intel/intel_blit.c | 31 +- src/mesa/drivers/dri/intel/intel_bufmgr_fake.c | 1173 +++++++++++++++++++++ src/mesa/drivers/dri/intel/intel_bufmgr_fake.h | 50 + src/mesa/drivers/dri/intel/intel_bufmgr_gem.c | 30 +- src/mesa/drivers/dri/intel/intel_context.c | 1 + 28 files changed, 1331 insertions(+), 1260 deletions(-) delete mode 100644 src/mesa/drivers/dri/common/dri_bufmgr_fake.c create mode 120000 src/mesa/drivers/dri/i915/intel_bufmgr_fake.c create mode 120000 src/mesa/drivers/dri/i965/intel_bufmgr_fake.c create mode 100644 src/mesa/drivers/dri/intel/intel_bufmgr_fake.c create mode 100644 src/mesa/drivers/dri/intel/intel_bufmgr_fake.h (limited to 'src') diff --git a/src/mesa/drivers/dri/Makefile.template b/src/mesa/drivers/dri/Makefile.template index 53f9d80689..cb41662707 100644 --- a/src/mesa/drivers/dri/Makefile.template +++ b/src/mesa/drivers/dri/Makefile.template @@ -12,8 +12,7 @@ COMMON_SOURCES = \ ../common/drirenderbuffer.c COMMON_BM_SOURCES = \ - ../common/dri_bufmgr.c \ - ../common/dri_bufmgr_fake.c + ../common/dri_bufmgr.c ifeq ($(WINDOW_SYSTEM),dri) diff --git a/src/mesa/drivers/dri/common/dri_bufmgr.c b/src/mesa/drivers/dri/common/dri_bufmgr.c index 69868b6665..5967d7dafb 100644 --- a/src/mesa/drivers/dri/common/dri_bufmgr.c +++ b/src/mesa/drivers/dri/common/dri_bufmgr.c @@ -121,10 +121,12 @@ dri_bufmgr_destroy(dri_bufmgr *bufmgr) } -int dri_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta, - GLuint offset, dri_bo *target_buf) +int dri_emit_reloc(dri_bo *reloc_buf, + uint32_t read_domains, uint32_t write_domain, + uint32_t delta, uint32_t offset, dri_bo *target_buf) { - return reloc_buf->bufmgr->emit_reloc(reloc_buf, flags, delta, offset, target_buf); + return reloc_buf->bufmgr->emit_reloc(reloc_buf, read_domains, write_domain, + delta, offset, target_buf); } void *dri_process_relocs(dri_bo *batch_buf) diff --git a/src/mesa/drivers/dri/common/dri_bufmgr.h b/src/mesa/drivers/dri/common/dri_bufmgr.h index dffeb4c601..99cfb2cd05 100644 --- a/src/mesa/drivers/dri/common/dri_bufmgr.h +++ b/src/mesa/drivers/dri/common/dri_bufmgr.h @@ -135,8 +135,9 @@ struct _dri_bufmgr { * \param target Buffer whose offset should be written into the relocation * entry. */ - int (*emit_reloc)(dri_bo *reloc_buf, uint64_t flags, GLuint delta, - GLuint offset, dri_bo *target); + int (*emit_reloc)(dri_bo *reloc_buf, + uint32_t read_domains, uint32_t write_domain, + uint32_t delta, uint32_t offset, dri_bo *target); /** * Processes the relocations, either in userland or by converting the list @@ -174,22 +175,12 @@ void dri_bo_subdata(dri_bo *bo, unsigned long offset, void dri_bo_get_subdata(dri_bo *bo, unsigned long offset, unsigned long size, void *data); -void dri_bufmgr_fake_contended_lock_take(dri_bufmgr *bufmgr); -dri_bufmgr *dri_bufmgr_fake_init(unsigned long low_offset, void *low_virtual, - unsigned long size, - unsigned int (*fence_emit)(void *private), - int (*fence_wait)(void *private, - unsigned int cookie), - void *driver_priv); void dri_bufmgr_set_debug(dri_bufmgr *bufmgr, GLboolean enable_debug); -void dri_bo_fake_disable_backing_store(dri_bo *bo, - void (*invalidate_cb)(dri_bo *bo, - void *ptr), - void *ptr); void dri_bufmgr_destroy(dri_bufmgr *bufmgr); -int dri_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta, - GLuint offset, dri_bo *target_buf); +int dri_emit_reloc(dri_bo *reloc_buf, + uint32_t read_domains, uint32_t write_domain, + uint32_t delta, uint32_t offset, dri_bo *target_buf); void *dri_process_relocs(dri_bo *batch_buf); void dri_post_process_relocs(dri_bo *batch_buf); void dri_post_submit(dri_bo *batch_buf); diff --git a/src/mesa/drivers/dri/common/dri_bufmgr_fake.c b/src/mesa/drivers/dri/common/dri_bufmgr_fake.c deleted file mode 100644 index fc52674839..0000000000 --- a/src/mesa/drivers/dri/common/dri_bufmgr_fake.c +++ /dev/null @@ -1,1174 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Originally a fake version of the buffer manager so that we can - * prototype the changes in a driver fairly quickly, has been fleshed - * out to a fully functional interim solution. - * - * Basically wraps the old style memory management in the new - * programming interface, but is more expressive and avoids many of - * the bugs in the old texture manager. - */ -#include "mtypes.h" -#include "dri_bufmgr.h" -#include "drm.h" - -#include "simple_list.h" -#include "mm.h" -#include "imports.h" - -#define DBG(...) do { \ - if (bufmgr_fake->bufmgr.debug) \ - _mesa_printf(__VA_ARGS__); \ -} while (0) - -/* Internal flags: - */ -#define BM_NO_BACKING_STORE 0x00000001 -#define BM_NO_FENCE_SUBDATA 0x00000002 -#define BM_PINNED 0x00000004 - -/* Wrapper around mm.c's mem_block, which understands that you must - * wait for fences to expire before memory can be freed. This is - * specific to our use of memcpy for uploads - an upload that was - * processed through the command queue wouldn't need to care about - * fences. - */ -#define MAX_RELOCS 4096 - -struct fake_buffer_reloc -{ - /** Buffer object that the relocation points at. */ - dri_bo *target_buf; - /** Offset of the relocation entry within reloc_buf. */ - GLuint offset; - /** Cached value of the offset when we last performed this relocation. */ - GLuint last_target_offset; - /** Value added to target_buf's offset to get the relocation entry. */ - GLuint delta; - /** Flags to validate the target buffer under. */ - uint64_t validate_flags; -}; - -struct block { - struct block *next, *prev; - struct mem_block *mem; /* BM_MEM_AGP */ - - /** - * Marks that the block is currently in the aperture and has yet to be - * fenced. - */ - unsigned on_hardware:1; - /** - * Marks that the block is currently fenced (being used by rendering) and - * can't be freed until @fence is passed. - */ - unsigned fenced:1; - - /** Fence cookie for the block. */ - unsigned fence; /* Split to read_fence, write_fence */ - - dri_bo *bo; - void *virtual; -}; - -typedef struct _bufmgr_fake { - dri_bufmgr bufmgr; - - unsigned long low_offset; - unsigned long size; - void *virtual; - - struct mem_block *heap; - struct block lru; /* only allocated, non-fence-pending blocks here */ - - unsigned buf_nr; /* for generating ids */ - - struct block on_hardware; /* after bmValidateBuffers */ - struct block fenced; /* after bmFenceBuffers (mi_flush, emit irq, write dword) */ - /* then to bufmgr->lru or free() */ - - unsigned int last_fence; - - unsigned fail:1; - unsigned need_fence:1; - GLboolean thrashing; - - /** - * Driver callback to emit a fence, returning the cookie. - * - * Currently, this also requires that a write flush be emitted before - * emitting the fence, but this should change. - */ - unsigned int (*fence_emit)(void *private); - /** Driver callback to wait for a fence cookie to have passed. */ - int (*fence_wait)(void *private, unsigned int fence_cookie); - /** Driver-supplied argument to driver callbacks */ - void *driver_priv; - - GLboolean debug; - - GLboolean performed_rendering; - - /* keep track of the current total size of objects we have relocs for */ - unsigned long current_total_size; -} dri_bufmgr_fake; - -typedef struct _dri_bo_fake { - dri_bo bo; - - unsigned id; /* debug only */ - const char *name; - - unsigned dirty:1; - unsigned size_accounted:1; /*this buffers size has been accounted against the aperture */ - unsigned card_dirty:1; /* has the card written to this buffer - we make need to copy it back */ - unsigned int refcount; - /* Flags may consist of any of the DRM_BO flags, plus - * DRM_BO_NO_BACKING_STORE and BM_NO_FENCE_SUBDATA, which are the first two - * driver private flags. - */ - uint64_t flags; - unsigned int alignment; - GLboolean is_static, validated; - unsigned int map_count; - - /* Flags for the buffer to be validated with in command submission */ - uint64_t validate_flags; - - /** relocation list */ - struct fake_buffer_reloc *relocs; - GLuint nr_relocs; - - struct block *block; - void *backing_store; - void (*invalidate_cb)(dri_bo *bo, void *ptr); - void *invalidate_ptr; -} dri_bo_fake; - -static int clear_fenced(dri_bufmgr_fake *bufmgr_fake, - unsigned int fence_cookie); - -static int dri_fake_check_aperture_space(dri_bo *bo); - -#define MAXFENCE 0x7fffffff - -static GLboolean FENCE_LTE( unsigned a, unsigned b ) -{ - if (a == b) - return GL_TRUE; - - if (a < b && b - a < (1<<24)) - return GL_TRUE; - - if (a > b && MAXFENCE - a + b < (1<<24)) - return GL_TRUE; - - return GL_FALSE; -} - -static unsigned int -_fence_emit_internal(dri_bufmgr_fake *bufmgr_fake) -{ - bufmgr_fake->last_fence = bufmgr_fake->fence_emit(bufmgr_fake->driver_priv); - return bufmgr_fake->last_fence; -} - -static void -_fence_wait_internal(dri_bufmgr_fake *bufmgr_fake, unsigned int cookie) -{ - int ret; - - ret = bufmgr_fake->fence_wait(bufmgr_fake->driver_priv, cookie); - if (ret != 0) { - _mesa_printf("%s:%d: Error %d waiting for fence.\n", - __FILE__, __LINE__); - abort(); - } - clear_fenced(bufmgr_fake, cookie); -} - -static GLboolean -_fence_test(dri_bufmgr_fake *bufmgr_fake, unsigned fence) -{ - /* Slight problem with wrap-around: - */ - return fence == 0 || FENCE_LTE(fence, bufmgr_fake->last_fence); -} - -/** - * Allocate a memory manager block for the buffer. - */ -static GLboolean -alloc_block(dri_bo *bo) -{ - dri_bo_fake *bo_fake = (dri_bo_fake *)bo; - dri_bufmgr_fake *bufmgr_fake= (dri_bufmgr_fake *)bo->bufmgr; - struct block *block = (struct block *)calloc(sizeof *block, 1); - unsigned int align_log2 = _mesa_ffs(bo_fake->alignment) - 1; - GLuint sz; - - if (!block) - return GL_FALSE; - - sz = (bo->size + bo_fake->alignment - 1) & ~(bo_fake->alignment - 1); - - block->mem = mmAllocMem(bufmgr_fake->heap, sz, align_log2, 0); - if (!block->mem) { - free(block); - return GL_FALSE; - } - - make_empty_list(block); - - /* Insert at head or at tail??? - */ - insert_at_tail(&bufmgr_fake->lru, block); - - block->virtual = bufmgr_fake->virtual + - block->mem->ofs - bufmgr_fake->low_offset; - block->bo = bo; - - bo_fake->block = block; - - return GL_TRUE; -} - -/* Release the card storage associated with buf: - */ -static void free_block(dri_bufmgr_fake *bufmgr_fake, struct block *block) -{ - dri_bo_fake *bo_fake; - DBG("free block %p %08x %d %d\n", block, block->mem->ofs, block->on_hardware, block->fenced); - - if (!block) - return; - - bo_fake = (dri_bo_fake *)block->bo; - if (!(bo_fake->flags & BM_NO_BACKING_STORE) && (bo_fake->card_dirty == 1)) { - memcpy(bo_fake->backing_store, block->virtual, block->bo->size); - bo_fake->card_dirty = 1; - bo_fake->dirty = 1; - } - - if (block->on_hardware) { - block->bo = NULL; - } - else if (block->fenced) { - block->bo = NULL; - } - else { - DBG(" - free immediately\n"); - remove_from_list(block); - - mmFreeMem(block->mem); - free(block); - } -} - -static void -alloc_backing_store(dri_bo *bo) -{ - dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; - dri_bo_fake *bo_fake = (dri_bo_fake *)bo; - assert(!bo_fake->backing_store); - assert(!(bo_fake->flags & (BM_PINNED|BM_NO_BACKING_STORE))); - - bo_fake->backing_store = ALIGN_MALLOC(bo->size, 64); - - DBG("alloc_backing - buf %d %p %d\n", bo_fake->id, bo_fake->backing_store, bo->size); - assert(bo_fake->backing_store); -} - -static void -free_backing_store(dri_bo *bo) -{ - dri_bo_fake *bo_fake = (dri_bo_fake *)bo; - - if (bo_fake->backing_store) { - assert(!(bo_fake->flags & (BM_PINNED|BM_NO_BACKING_STORE))); - ALIGN_FREE(bo_fake->backing_store); - bo_fake->backing_store = NULL; - } -} - -static void -set_dirty(dri_bo *bo) -{ - dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; - dri_bo_fake *bo_fake = (dri_bo_fake *)bo; - - if (bo_fake->flags & BM_NO_BACKING_STORE && bo_fake->invalidate_cb != NULL) - bo_fake->invalidate_cb(bo, bo_fake->invalidate_ptr); - - assert(!(bo_fake->flags & BM_PINNED)); - - DBG("set_dirty - buf %d\n", bo_fake->id); - bo_fake->dirty = 1; -} - -static GLboolean -evict_lru(dri_bufmgr_fake *bufmgr_fake, GLuint max_fence) -{ - struct block *block, *tmp; - - DBG("%s\n", __FUNCTION__); - - foreach_s(block, tmp, &bufmgr_fake->lru) { - dri_bo_fake *bo_fake = (dri_bo_fake *)block->bo; - - if (bo_fake != NULL && (bo_fake->flags & BM_NO_FENCE_SUBDATA)) - continue; - - if (block->fence && max_fence && !FENCE_LTE(block->fence, max_fence)) - return 0; - - set_dirty(&bo_fake->bo); - bo_fake->block = NULL; - - free_block(bufmgr_fake, block); - return GL_TRUE; - } - - return GL_FALSE; -} - -#define foreach_s_rev(ptr, t, list) \ - for(ptr=(list)->prev,t=(ptr)->prev; list != ptr; ptr=t, t=(t)->prev) - -static GLboolean -evict_mru(dri_bufmgr_fake *bufmgr_fake) -{ - struct block *block, *tmp; - - DBG("%s\n", __FUNCTION__); - - foreach_s_rev(block, tmp, &bufmgr_fake->lru) { - dri_bo_fake *bo_fake = (dri_bo_fake *)block->bo; - - if (bo_fake && (bo_fake->flags & BM_NO_FENCE_SUBDATA)) - continue; - - set_dirty(&bo_fake->bo); - bo_fake->block = NULL; - - free_block(bufmgr_fake, block); - return GL_TRUE; - } - - return GL_FALSE; -} - -/** - * Removes all objects from the fenced list older than the given fence. - */ -static int clear_fenced(dri_bufmgr_fake *bufmgr_fake, - unsigned int fence_cookie) -{ - struct block *block, *tmp; - int ret = 0; - - foreach_s(block, tmp, &bufmgr_fake->fenced) { - assert(block->fenced); - - if (_fence_test(bufmgr_fake, block->fence)) { - - block->fenced = 0; - - if (!block->bo) { - DBG("delayed free: offset %x sz %x\n", - block->mem->ofs, block->mem->size); - remove_from_list(block); - mmFreeMem(block->mem); - free(block); - } - else { - DBG("return to lru: offset %x sz %x\n", - block->mem->ofs, block->mem->size); - move_to_tail(&bufmgr_fake->lru, block); - } - - ret = 1; - } - else { - /* Blocks are ordered by fence, so if one fails, all from - * here will fail also: - */ - DBG("fence not passed: offset %x sz %x %d %d \n", - block->mem->ofs, block->mem->size, block->fence, bufmgr_fake->last_fence); - break; - } - } - - DBG("%s: %d\n", __FUNCTION__, ret); - return ret; -} - -static void fence_blocks(dri_bufmgr_fake *bufmgr_fake, unsigned fence) -{ - struct block *block, *tmp; - - foreach_s (block, tmp, &bufmgr_fake->on_hardware) { - DBG("Fence block %p (sz 0x%x ofs %x buf %p) with fence %d\n", block, - block->mem->size, block->mem->ofs, block->bo, fence); - block->fence = fence; - - block->on_hardware = 0; - block->fenced = 1; - - /* Move to tail of pending list here - */ - move_to_tail(&bufmgr_fake->fenced, block); - } - - assert(is_empty_list(&bufmgr_fake->on_hardware)); -} - -static GLboolean evict_and_alloc_block(dri_bo *bo) -{ - dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; - dri_bo_fake *bo_fake = (dri_bo_fake *)bo; - - assert(bo_fake->block == NULL); - - /* Search for already free memory: - */ - if (alloc_block(bo)) - return GL_TRUE; - - /* If we're not thrashing, allow lru eviction to dig deeper into - * recently used textures. We'll probably be thrashing soon: - */ - if (!bufmgr_fake->thrashing) { - while (evict_lru(bufmgr_fake, 0)) - if (alloc_block(bo)) - return GL_TRUE; - } - - /* Keep thrashing counter alive? - */ - if (bufmgr_fake->thrashing) - bufmgr_fake->thrashing = 20; - - /* Wait on any already pending fences - here we are waiting for any - * freed memory that has been submitted to hardware and fenced to - * become available: - */ - while (!is_empty_list(&bufmgr_fake->fenced)) { - GLuint fence = bufmgr_fake->fenced.next->fence; - _fence_wait_internal(bufmgr_fake, fence); - - if (alloc_block(bo)) - return GL_TRUE; - } - - if (!is_empty_list(&bufmgr_fake->on_hardware)) { - while (!is_empty_list(&bufmgr_fake->fenced)) { - GLuint fence = bufmgr_fake->fenced.next->fence; - _fence_wait_internal(bufmgr_fake, fence); - } - - if (!bufmgr_fake->thrashing) { - DBG("thrashing\n"); - } - bufmgr_fake->thrashing = 20; - - if (alloc_block(bo)) - return GL_TRUE; - } - - while (evict_mru(bufmgr_fake)) - if (alloc_block(bo)) - return GL_TRUE; - - DBG("%s 0x%x bytes failed\n", __FUNCTION__, bo->size); - - return GL_FALSE; -} - -/*********************************************************************** - * Public functions - */ - -/** - * Wait for hardware idle by emitting a fence and waiting for it. - */ -static void -dri_bufmgr_fake_wait_idle(dri_bufmgr_fake *bufmgr_fake) -{ - unsigned int cookie; - - cookie = bufmgr_fake->fence_emit(bufmgr_fake->driver_priv); - _fence_wait_internal(bufmgr_fake, cookie); -} - -/** - * Wait for execution pending on a buffer - */ -static void -dri_bufmgr_fake_bo_wait_idle(dri_bo *bo) -{ - dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; - dri_bo_fake *bo_fake = (dri_bo_fake *)bo; - - if (bo_fake->block == NULL || !bo_fake->block->fenced) - return; - - _fence_wait_internal(bufmgr_fake, bo_fake->block->fence); -} - -/* Specifically ignore texture memory sharing. - * -- just evict everything - * -- and wait for idle - */ -void -dri_bufmgr_fake_contended_lock_take(dri_bufmgr *bufmgr) -{ - dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bufmgr; - struct block *block, *tmp; - - bufmgr_fake->need_fence = 1; - bufmgr_fake->fail = 0; - - /* Wait for hardware idle. We don't know where acceleration has been - * happening, so we'll need to wait anyway before letting anything get - * put on the card again. - */ - dri_bufmgr_fake_wait_idle(bufmgr_fake); - - /* Check that we hadn't released the lock without having fenced the last - * set of buffers. - */ - assert(is_empty_list(&bufmgr_fake->fenced)); - assert(is_empty_list(&bufmgr_fake->on_hardware)); - - foreach_s(block, tmp, &bufmgr_fake->lru) { - assert(_fence_test(bufmgr_fake, block->fence)); - set_dirty(block->bo); - } -} - -static dri_bo * -dri_fake_bo_alloc(dri_bufmgr *bufmgr, const char *name, - unsigned long size, unsigned int alignment, - uint64_t location_mask) -{ - dri_bufmgr_fake *bufmgr_fake; - dri_bo_fake *bo_fake; - - bufmgr_fake = (dri_bufmgr_fake *)bufmgr; - - assert(size != 0); - - bo_fake = calloc(1, sizeof(*bo_fake)); - if (!bo_fake) - return NULL; - - bo_fake->bo.size = size; - bo_fake->bo.offset = -1; - bo_fake->bo.virtual = NULL; - bo_fake->bo.bufmgr = bufmgr; - bo_fake->refcount = 1; - - /* Alignment must be a power of two */ - assert((alignment & (alignment - 1)) == 0); - if (alignment == 0) - alignment = 1; - bo_fake->alignment = alignment; - bo_fake->id = ++bufmgr_fake->buf_nr; - bo_fake->name = name; - bo_fake->flags = 0; - bo_fake->is_static = GL_FALSE; - - DBG("drm_bo_alloc: (buf %d: %s, %d kb)\n", bo_fake->id, bo_fake->name, - bo_fake->bo.size / 1024); - - return &bo_fake->bo; -} - -static dri_bo * -dri_fake_bo_alloc_static(dri_bufmgr *bufmgr, const char *name, - unsigned long offset, unsigned long size, - void *virtual, uint64_t location_mask) -{ - dri_bufmgr_fake *bufmgr_fake; - dri_bo_fake *bo_fake; - - bufmgr_fake = (dri_bufmgr_fake *)bufmgr; - - assert(size != 0); - - bo_fake = calloc(1, sizeof(*bo_fake)); - if (!bo_fake) - return NULL; - - bo_fake->bo.size = size; - bo_fake->bo.offset = offset; - bo_fake->bo.virtual = virtual; - bo_fake->bo.bufmgr = bufmgr; - bo_fake->refcount = 1; - bo_fake->id = ++bufmgr_fake->buf_nr; - bo_fake->name = name; - bo_fake->flags = BM_PINNED | DRM_BO_FLAG_NO_MOVE; - bo_fake->is_static = GL_TRUE; - - DBG("drm_bo_alloc_static: (buf %d: %s, %d kb)\n", bo_fake->id, bo_fake->name, - bo_fake->bo.size / 1024); - - return &bo_fake->bo; -} - -static void -dri_fake_bo_reference(dri_bo *bo) -{ - dri_bo_fake *bo_fake = (dri_bo_fake *)bo; - - bo_fake->refcount++; -} - -static void -dri_fake_bo_unreference(dri_bo *bo) -{ - dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; - dri_bo_fake *bo_fake = (dri_bo_fake *)bo; - int i; - - if (!bo) - return; - - if (--bo_fake->refcount == 0) { - assert(bo_fake->map_count == 0); - /* No remaining references, so free it */ - if (bo_fake->block) - free_block(bufmgr_fake, bo_fake->block); - free_backing_store(bo); - - for (i = 0; i < bo_fake->nr_relocs; i++) - dri_bo_unreference(bo_fake->relocs[i].target_buf); - - DBG("drm_bo_unreference: free buf %d %s\n", bo_fake->id, bo_fake->name); - - free(bo_fake->relocs); - free(bo); - - return; - } -} - -/** - * Set the buffer as not requiring backing store, and instead get the callback - * invoked whenever it would be set dirty. - */ -void dri_bo_fake_disable_backing_store(dri_bo *bo, - void (*invalidate_cb)(dri_bo *bo, - void *ptr), - void *ptr) -{ - dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; - dri_bo_fake *bo_fake = (dri_bo_fake *)bo; - - if (bo_fake->backing_store) - free_backing_store(bo); - - bo_fake->flags |= BM_NO_BACKING_STORE; - - DBG("disable_backing_store set buf %d dirty\n", bo_fake->id); - bo_fake->dirty = 1; - bo_fake->invalidate_cb = invalidate_cb; - bo_fake->invalidate_ptr = ptr; - - /* Note that it is invalid right from the start. Also note - * invalidate_cb is called with the bufmgr locked, so cannot - * itself make bufmgr calls. - */ - if (invalidate_cb != NULL) - invalidate_cb(bo, ptr); -} - -/** - * Map a buffer into bo->virtual, allocating either card memory space (If - * BM_NO_BACKING_STORE or BM_PINNED) or backing store, as necessary. - */ -static int -dri_fake_bo_map(dri_bo *bo, GLboolean write_enable) -{ - dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; - dri_bo_fake *bo_fake = (dri_bo_fake *)bo; - - /* Static buffers are always mapped. */ - if (bo_fake->is_static) - return 0; - - /* Allow recursive mapping. Mesa may recursively map buffers with - * nested display loops, and it is used internally in bufmgr_fake - * for relocation. - */ - if (bo_fake->map_count++ != 0) - return 0; - - { - DBG("drm_bo_map: (buf %d: %s, %d kb)\n", bo_fake->id, bo_fake->name, - bo_fake->bo.size / 1024); - - if (bo->virtual != NULL) { - _mesa_printf("%s: already mapped\n", __FUNCTION__); - abort(); - } - else if (bo_fake->flags & (BM_NO_BACKING_STORE|BM_PINNED)) { - - if (!bo_fake->block && !evict_and_alloc_block(bo)) { - DBG("%s: alloc failed\n", __FUNCTION__); - bufmgr_fake->fail = 1; - return 1; - } - else { - assert(bo_fake->block); - bo_fake->dirty = 0; - - if (!(bo_fake->flags & BM_NO_FENCE_SUBDATA) && - bo_fake->block->fenced) { - dri_bufmgr_fake_bo_wait_idle(bo); - } - - bo->virtual = bo_fake->block->virtual; - } - } - else { - if (write_enable) - set_dirty(bo); - - if (bo_fake->backing_store == 0) - alloc_backing_store(bo); - - bo->virtual = bo_fake->backing_store; - } - } - - return 0; -} - -static int -dri_fake_bo_unmap(dri_bo *bo) -{ - dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; - dri_bo_fake *bo_fake = (dri_bo_fake *)bo; - - /* Static buffers are always mapped. */ - if (bo_fake->is_static) - return 0; - - assert(bo_fake->map_count != 0); - if (--bo_fake->map_count != 0) - return 0; - - DBG("drm_bo_unmap: (buf %d: %s, %d kb)\n", bo_fake->id, bo_fake->name, - bo_fake->bo.size / 1024); - - bo->virtual = NULL; - - return 0; -} - -static void -dri_fake_kick_all(dri_bufmgr_fake *bufmgr_fake) -{ - struct block *block, *tmp; - - bufmgr_fake->performed_rendering = GL_FALSE; - /* okay for ever BO that is on the HW kick it off. - seriously not afraid of the POLICE right now */ - foreach_s(block, tmp, &bufmgr_fake->on_hardware) { - dri_bo_fake *bo_fake = (dri_bo_fake *)block->bo; - - block->on_hardware = 0; - free_block(bufmgr_fake, block); - bo_fake->block = NULL; - bo_fake->validated = GL_FALSE; - if (!(bo_fake->flags & BM_NO_BACKING_STORE)) - bo_fake->dirty = 1; - } -} - -static int -dri_fake_bo_validate(dri_bo *bo, uint64_t flags) -{ - dri_bufmgr_fake *bufmgr_fake; - dri_bo_fake *bo_fake = (dri_bo_fake *)bo; - - /* XXX: Sanity-check whether we've already validated this one under - * different flags. See drmAddValidateItem(). - */ - bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; - - DBG("drm_bo_validate: (buf %d: %s, %d kb)\n", bo_fake->id, bo_fake->name, - bo_fake->bo.size / 1024); - - /* Sanity check: Buffers should be unmapped before being validated. - * This is not so much of a problem for bufmgr_fake, but TTM refuses, - * and the problem is harder to debug there. - */ - assert(bo_fake->map_count == 0); - - if (bo_fake->is_static) { - /* Add it to the needs-fence list */ - bufmgr_fake->need_fence = 1; - return 0; - } - - /* reset size accounted */ - bo_fake->size_accounted = 0; - - /* Allocate the card memory */ - if (!bo_fake->block && !evict_and_alloc_block(bo)) { - bufmgr_fake->fail = 1; - DBG("Failed to validate buf %d:%s\n", bo_fake->id, bo_fake->name); - return -1; - } - - assert(bo_fake->block); - assert(bo_fake->block->bo == &bo_fake->bo); - - bo->offset = bo_fake->block->mem->ofs; - - /* Upload the buffer contents if necessary */ - if (bo_fake->dirty) { - DBG("Upload dirty buf %d:%s, sz %d offset 0x%x\n", bo_fake->id, - bo_fake->name, bo->size, bo_fake->block->mem->ofs); - - assert(!(bo_fake->flags & - (BM_NO_BACKING_STORE|BM_PINNED))); - - /* Actually, should be able to just wait for a fence on the memory, - * which we would be tracking when we free it. Waiting for idle is - * a sufficiently large hammer for now. - */ - dri_bufmgr_fake_wait_idle(bufmgr_fake); - - /* we may never have mapped this BO so it might not have any backing - * store if this happens it should be rare, but 0 the card memory - * in any case */ - if (bo_fake->backing_store) - memcpy(bo_fake->block->virtual, bo_fake->backing_store, bo->size); - else - memset(bo_fake->block->virtual, 0, bo->size); - - bo_fake->dirty = 0; - } - - bo_fake->block->fenced = 0; - bo_fake->block->on_hardware = 1; - move_to_tail(&bufmgr_fake->on_hardware, bo_fake->block); - - bo_fake->validated = GL_TRUE; - bufmgr_fake->need_fence = 1; - - return 0; -} - -static void -dri_fake_fence_validated(dri_bufmgr *bufmgr) -{ - dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bufmgr; - unsigned int cookie; - - cookie = _fence_emit_internal(bufmgr_fake); - fence_blocks(bufmgr_fake, cookie); - - DBG("drm_fence_validated: 0x%08x cookie\n", cookie); -} - -static void -dri_fake_destroy(dri_bufmgr *bufmgr) -{ - dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bufmgr; - - mmDestroy(bufmgr_fake->heap); - free(bufmgr); -} - -static int -dri_fake_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta, - GLuint offset, dri_bo *target_buf) -{ - dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)reloc_buf->bufmgr; - struct fake_buffer_reloc *r; - dri_bo_fake *reloc_fake = (dri_bo_fake *)reloc_buf; - dri_bo_fake *target_fake = (dri_bo_fake *)target_buf; - int i; - - assert(reloc_buf); - assert(target_buf); - - assert(target_fake->is_static || target_fake->size_accounted); - - if (reloc_fake->relocs == NULL) { - reloc_fake->relocs = malloc(sizeof(struct fake_buffer_reloc) * - MAX_RELOCS); - } - - r = &reloc_fake->relocs[reloc_fake->nr_relocs++]; - - assert(reloc_fake->nr_relocs <= MAX_RELOCS); - - dri_bo_reference(target_buf); - - r->target_buf = target_buf; - r->offset = offset; - r->last_target_offset = target_buf->offset; - r->delta = delta; - r->validate_flags = flags; - - if (bufmgr_fake->debug) { - /* Check that a conflicting relocation hasn't already been emitted. */ - for (i = 0; i < reloc_fake->nr_relocs - 1; i++) { - struct fake_buffer_reloc *r2 = &reloc_fake->relocs[i]; - - assert(r->offset != r2->offset); - } - } - - return 0; -} - -/** - * Incorporates the validation flags associated with each relocation into - * the combined validation flags for the buffer on this batchbuffer submission. - */ -static void -dri_fake_calculate_validate_flags(dri_bo *bo) -{ - dri_bo_fake *bo_fake = (dri_bo_fake *)bo; - int i; - - for (i = 0; i < bo_fake->nr_relocs; i++) { - struct fake_buffer_reloc *r = &bo_fake->relocs[i]; - dri_bo_fake *target_fake = (dri_bo_fake *)r->target_buf; - - /* Do the same for the tree of buffers we depend on */ - dri_fake_calculate_validate_flags(r->target_buf); - - if (target_fake->validate_flags == 0) { - target_fake->validate_flags = r->validate_flags; - } else { - /* Mask the memory location to the intersection of all the memory - * locations the buffer is being validated to. - */ - target_fake->validate_flags = - (target_fake->validate_flags & ~DRM_BO_MASK_MEM) | - (r->validate_flags & target_fake->validate_flags & - DRM_BO_MASK_MEM); - /* All the other flags just accumulate. */ - target_fake->validate_flags |= r->validate_flags & ~DRM_BO_MASK_MEM; - } - } -} - - -static int -dri_fake_reloc_and_validate_buffer(dri_bo *bo) -{ - dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; - dri_bo_fake *bo_fake = (dri_bo_fake *)bo; - int i, ret; - - assert(bo_fake->map_count == 0); - - for (i = 0; i < bo_fake->nr_relocs; i++) { - struct fake_buffer_reloc *r = &bo_fake->relocs[i]; - dri_bo_fake *target_fake = (dri_bo_fake *)r->target_buf; - uint32_t reloc_data; - - /* Validate the target buffer if that hasn't been done. */ - if (!target_fake->validated) { - ret = dri_fake_reloc_and_validate_buffer(r->target_buf); - if (ret != 0) { - if (bo->virtual != NULL) - dri_bo_unmap(bo); - return ret; - } - } - - /* Calculate the value of the relocation entry. */ - if (r->target_buf->offset != r->last_target_offset) { - reloc_data = r->target_buf->offset + r->delta; - - if (bo->virtual == NULL) - dri_bo_map(bo, GL_TRUE); - - *(uint32_t *)(bo->virtual + r->offset) = reloc_data; - - r->last_target_offset = r->target_buf->offset; - } - } - - if (bo->virtual != NULL) - dri_bo_unmap(bo); - - if (bo_fake->validate_flags & DRM_BO_FLAG_WRITE) { - if (!(bo_fake->flags & (BM_NO_BACKING_STORE|BM_PINNED))) { - if (bo_fake->backing_store == 0) - alloc_backing_store(bo); - - bo_fake->card_dirty = 1; - } - bufmgr_fake->performed_rendering = GL_TRUE; - } - - return dri_fake_bo_validate(bo, bo_fake->validate_flags); -} - -static void * -dri_fake_process_relocs(dri_bo *batch_buf) -{ - dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)batch_buf->bufmgr; - dri_bo_fake *batch_fake = (dri_bo_fake *)batch_buf; - int ret; - int retry_count = 0; - - bufmgr_fake->performed_rendering = GL_FALSE; - - dri_fake_calculate_validate_flags(batch_buf); - - batch_fake->validate_flags = DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ; - - /* we've ran out of RAM so blow the whole lot away and retry */ - restart: - ret = dri_fake_reloc_and_validate_buffer(batch_buf); - if (bufmgr_fake->fail == 1) { - if (retry_count == 0) { - retry_count++; - dri_fake_kick_all(bufmgr_fake); - bufmgr_fake->fail = 0; - goto restart; - } else /* dump out the memory here */ - mmDumpMemInfo(bufmgr_fake->heap); - } - - assert(ret == 0); - - bufmgr_fake->current_total_size = 0; - return NULL; -} - -static void -dri_bo_fake_post_submit(dri_bo *bo) -{ - dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; - dri_bo_fake *bo_fake = (dri_bo_fake *)bo; - int i; - - for (i = 0; i < bo_fake->nr_relocs; i++) { - struct fake_buffer_reloc *r = &bo_fake->relocs[i]; - dri_bo_fake *target_fake = (dri_bo_fake *)r->target_buf; - - if (target_fake->validated) - dri_bo_fake_post_submit(r->target_buf); - - DBG("%s@0x%08x + 0x%08x -> %s@0x%08x + 0x%08x\n", - bo_fake->name, (uint32_t)bo->offset, r->offset, - target_fake->name, (uint32_t)r->target_buf->offset, r->delta); - } - - assert(bo_fake->map_count == 0); - bo_fake->validated = GL_FALSE; - bo_fake->validate_flags = 0; -} - - -static void -dri_fake_post_submit(dri_bo *batch_buf) -{ - dri_fake_fence_validated(batch_buf->bufmgr); - - dri_bo_fake_post_submit(batch_buf); -} - -static int -dri_fake_check_aperture_space(dri_bo *bo) -{ - dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; - dri_bo_fake *bo_fake = (dri_bo_fake *)bo; - GLuint sz; - - sz = (bo->size + bo_fake->alignment - 1) & ~(bo_fake->alignment - 1); - - if (bo_fake->size_accounted || bo_fake->is_static) - return 0; - - if (bufmgr_fake->current_total_size + sz > bufmgr_fake->size) { - DBG("check_space: %s bo %d %d overflowed bufmgr size %d\n", bo_fake->name, bo_fake->id, sz, bufmgr_fake->size); - return -1; - } - - bufmgr_fake->current_total_size += sz; - bo_fake->size_accounted = 1; - DBG("drm_check_space: buf %d, %s %d %d\n", bo_fake->id, bo_fake->name, bo->size, bufmgr_fake->current_total_size); - return 0; -} - -dri_bufmgr * -dri_bufmgr_fake_init(unsigned long low_offset, void *low_virtual, - unsigned long size, - unsigned int (*fence_emit)(void *private), - int (*fence_wait)(void *private, unsigned int cookie), - void *driver_priv) -{ - dri_bufmgr_fake *bufmgr_fake; - - bufmgr_fake = calloc(1, sizeof(*bufmgr_fake)); - - /* Initialize allocator */ - make_empty_list(&bufmgr_fake->fenced); - make_empty_list(&bufmgr_fake->on_hardware); - make_empty_list(&bufmgr_fake->lru); - - bufmgr_fake->low_offset = low_offset; - bufmgr_fake->virtual = low_virtual; - bufmgr_fake->size = size; - bufmgr_fake->heap = mmInit(low_offset, size); - - /* Hook in methods */ - bufmgr_fake->bufmgr.bo_alloc = dri_fake_bo_alloc; - bufmgr_fake->bufmgr.bo_alloc_static = dri_fake_bo_alloc_static; - bufmgr_fake->bufmgr.bo_reference = dri_fake_bo_reference; - bufmgr_fake->bufmgr.bo_unreference = dri_fake_bo_unreference; - bufmgr_fake->bufmgr.bo_map = dri_fake_bo_map; - bufmgr_fake->bufmgr.bo_unmap = dri_fake_bo_unmap; - bufmgr_fake->bufmgr.destroy = dri_fake_destroy; - bufmgr_fake->bufmgr.emit_reloc = dri_fake_emit_reloc; - bufmgr_fake->bufmgr.process_relocs = dri_fake_process_relocs; - bufmgr_fake->bufmgr.post_submit = dri_fake_post_submit; - bufmgr_fake->bufmgr.check_aperture_space = dri_fake_check_aperture_space; - bufmgr_fake->bufmgr.debug = GL_FALSE; - - bufmgr_fake->fence_emit = fence_emit; - bufmgr_fake->fence_wait = fence_wait; - bufmgr_fake->driver_priv = driver_priv; - - return &bufmgr_fake->bufmgr; -} - diff --git a/src/mesa/drivers/dri/i915/Makefile b/src/mesa/drivers/dri/i915/Makefile index 67f251a7fa..476814c4ec 100644 --- a/src/mesa/drivers/dri/i915/Makefile +++ b/src/mesa/drivers/dri/i915/Makefile @@ -54,6 +54,7 @@ DRIVER_SOURCES = \ intel_tris.c \ intel_fbo.c \ intel_depthstencil.c \ + intel_bufmgr_fake.c \ intel_bufmgr_gem.c C_SOURCES = \ diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c index c5a85fe035..86bf468a7e 100644 --- a/src/mesa/drivers/dri/i915/i830_vtbl.c +++ b/src/mesa/drivers/dri/i915/i830_vtbl.c @@ -490,14 +490,14 @@ i830_emit_state(struct intel_context *intel) OUT_BATCH(state->Buffer[I830_DESTREG_CBUFADDR0]); OUT_BATCH(state->Buffer[I830_DESTREG_CBUFADDR1]); OUT_RELOC(state->draw_region->buffer, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, + DRM_GEM_DOMAIN_I915_RENDER, DRM_GEM_DOMAIN_I915_RENDER, state->draw_region->draw_offset); if (state->depth_region) { OUT_BATCH(state->Buffer[I830_DESTREG_DBUFADDR0]); OUT_BATCH(state->Buffer[I830_DESTREG_DBUFADDR1]); OUT_RELOC(state->depth_region->buffer, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, + DRM_GEM_DOMAIN_I915_RENDER, DRM_GEM_DOMAIN_I915_RENDER, state->depth_region->draw_offset); } @@ -524,7 +524,7 @@ i830_emit_state(struct intel_context *intel) if (state->tex_buffer[i]) { OUT_RELOC(state->tex_buffer[i], - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + DRM_GEM_DOMAIN_I915_SAMPLER, 0, state->tex_offset[i] | TM0S0_USE_FENCE); } else if (state == &i830->meta) { diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index 135bfaa265..de1ec5effc 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -377,14 +377,14 @@ i915_emit_state(struct intel_context *intel) OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR0]); OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR1]); OUT_RELOC(state->draw_region->buffer, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, + DRM_GEM_DOMAIN_I915_RENDER, DRM_GEM_DOMAIN_I915_RENDER, state->draw_region->draw_offset); if (state->depth_region) { OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR0]); OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR1]); OUT_RELOC(state->depth_region->buffer, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, + DRM_GEM_DOMAIN_I915_RENDER, DRM_GEM_DOMAIN_I915_RENDER, state->depth_region->draw_offset); } @@ -427,7 +427,7 @@ i915_emit_state(struct intel_context *intel) if (state->tex_buffer[i]) { OUT_RELOC(state->tex_buffer[i], - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + DRM_GEM_DOMAIN_I915_SAMPLER, 0, state->tex_offset[i]); } else if (state == &i915->meta) { diff --git a/src/mesa/drivers/dri/i915/intel_bufmgr_fake.c b/src/mesa/drivers/dri/i915/intel_bufmgr_fake.c new file mode 120000 index 0000000000..9b840a8123 --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_bufmgr_fake.c @@ -0,0 +1 @@ +../intel/intel_bufmgr_fake.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index ca9b7da40f..001f63ba12 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -9,6 +9,7 @@ DRIVER_SOURCES = \ intel_blit.c \ intel_buffer_objects.c \ intel_buffers.c \ + intel_bufmgr_fake.c \ intel_bufmgr_gem.c \ intel_context.c \ intel_decode.c \ diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c index 9d8984f05c..b9338db0f5 100644 --- a/src/mesa/drivers/dri/i965/brw_cc.c +++ b/src/mesa/drivers/dri/i965/brw_cc.c @@ -257,7 +257,8 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key) /* Emit CC viewport relocation */ dri_emit_reloc(bo, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + DRM_GEM_DOMAIN_I915_INSTRUCTION, + 0, 0, offsetof(struct brw_cc_unit_state, cc4), brw->cc.vp_bo); diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c index 7cb21f894e..26c322672c 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_state.c +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c @@ -120,7 +120,8 @@ clip_unit_create_from_key(struct brw_context *brw, /* Emit clip program relocation */ assert(brw->clip.prog_bo); dri_emit_reloc(bo, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + DRM_GEM_DOMAIN_I915_INSTRUCTION, + 0, clip.thread0.grf_reg_count << 1, offsetof(struct brw_clip_unit_state, thread0), brw->clip.prog_bo); diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 5ff4e2964e..1b5e22f130 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -353,7 +353,8 @@ static void emit_constant_buffer(struct brw_context *brw) OUT_BATCH(0); } else { OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2)); - OUT_RELOC(brw->curbe.curbe_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + OUT_RELOC(brw->curbe.curbe_bo, + DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, (sz - 1) + brw->curbe.curbe_offset); } ADVANCE_BATCH(); diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index aa985d68b6..5222d2e450 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -469,7 +469,7 @@ void brw_emit_vertices( struct brw_context *brw, BRW_VB0_ACCESS_VERTEXDATA | (input->stride << BRW_VB0_PITCH_SHIFT)); OUT_RELOC(input->bo, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + DRM_GEM_DOMAIN_I915_VERTEX, 0, input->offset); OUT_BATCH(max_index); OUT_BATCH(0); /* Instance data step rate */ @@ -590,8 +590,11 @@ void brw_emit_indices(struct brw_context *brw, BEGIN_BATCH(4, IGNORE_CLIPRECTS); OUT_BATCH( ib.header.dword ); - OUT_RELOC( bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, offset); - OUT_RELOC( bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + OUT_RELOC( bo, + DRM_GEM_DOMAIN_I915_VERTEX, 0, + offset); + OUT_RELOC( bo, + DRM_GEM_DOMAIN_I915_VERTEX, 0, offset + ib_size); OUT_BATCH( 0 ); ADVANCE_BATCH(); diff --git a/src/mesa/drivers/dri/i965/brw_gs_state.c b/src/mesa/drivers/dri/i965/brw_gs_state.c index f1f9e018f1..2bf86f5573 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_state.c +++ b/src/mesa/drivers/dri/i965/brw_gs_state.c @@ -107,7 +107,7 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) if (key->prog_active) { /* Emit GS program relocation */ dri_emit_reloc(bo, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, gs.thread0.grf_reg_count << 1, offsetof(struct brw_gs_unit_state, thread0), brw->gs.prog_bo); diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 26ec797b5f..24dfd2e24e 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -88,7 +88,9 @@ static void upload_binding_table_pointers(struct brw_context *brw) OUT_BATCH(0); /* gs */ OUT_BATCH(0); /* clip */ OUT_BATCH(0); /* sf */ - OUT_RELOC(brw->wm.bind_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0); + OUT_RELOC(brw->wm.bind_bo, + DRM_GEM_DOMAIN_I915_SAMPLER, 0, + 0); ADVANCE_BATCH(); } @@ -114,18 +116,18 @@ static void upload_pipelined_state_pointers(struct brw_context *brw ) BEGIN_BATCH(7, IGNORE_CLIPRECTS); OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2)); - OUT_RELOC(brw->vs.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0); + OUT_RELOC(brw->vs.state_bo, DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, 0); if (brw->gs.prog_active) - OUT_RELOC(brw->gs.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 1); + OUT_RELOC(brw->gs.state_bo, DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, 1); else OUT_BATCH(0); if (!brw->metaops.active) - OUT_RELOC(brw->clip.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 1); + OUT_RELOC(brw->clip.state_bo, DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, 1); else OUT_BATCH(0); - OUT_RELOC(brw->sf.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0); - OUT_RELOC(brw->wm.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0); - OUT_RELOC(brw->cc.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0); + OUT_RELOC(brw->sf.state_bo, DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, 0); + OUT_RELOC(brw->wm.state_bo, DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, 0); + OUT_RELOC(brw->cc.state_bo, DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, 0); ADVANCE_BATCH(); brw->state.dirty.brw |= BRW_NEW_PSP; @@ -233,7 +235,8 @@ static void emit_depthbuffer(struct brw_context *brw) (region->tiled << 27) | (BRW_SURFACE_2D << 29)); OUT_RELOC(region->buffer, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, 0); + DRM_GEM_DOMAIN_I915_RENDER, DRM_GEM_DOMAIN_I915_RENDER, + 0); OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) | ((region->pitch - 1) << 6) | ((region->height - 1) << 19)); diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index 24388b79a5..5cf3228486 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -254,14 +254,14 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, /* Emit SF program relocation */ dri_emit_reloc(bo, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, sf.thread0.grf_reg_count << 1, offsetof(struct brw_sf_unit_state, thread0), brw->sf.prog_bo); /* Emit SF viewport relocation */ dri_emit_reloc(bo, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, sf.sf5.front_winding | (sf.sf5.viewport_transform << 1), offsetof(struct brw_sf_unit_state, sf5), brw->sf.vp_bo); diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index 2a64f3df33..73f52d7428 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -116,7 +116,7 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) /* Emit VS program relocation */ dri_emit_reloc(bo, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, vs.thread0.grf_reg_count << 1, offsetof(struct brw_vs_unit_state, thread0), brw->vs.prog_bo); diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index d40332e9ae..13f7f21800 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -306,7 +306,7 @@ static int upload_wm_samplers( struct brw_context *brw ) ret |= dri_bufmgr_check_aperture_space(brw->wm.sdc_bo[i]); dri_emit_reloc(brw->wm.sampler_bo, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, 0, i * sizeof(struct brw_sampler_state) + offsetof(struct brw_sampler_state, ss2), diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index f4da0f279e..f79b58ba7a 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -200,7 +200,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, /* Emit WM program relocation */ dri_emit_reloc(bo, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, wm.thread0.grf_reg_count << 1, offsetof(struct brw_wm_unit_state, thread0), brw->wm.prog_bo); @@ -208,7 +208,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, /* Emit scratch space relocation */ if (key->total_scratch != 0) { dri_emit_reloc(bo, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, + 0, 0, wm.thread2.per_thread_scratch_space, offsetof(struct brw_wm_unit_state, thread2), brw->wm.scratch_buffer); @@ -217,7 +217,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, /* Emit sampler state relocation */ if (key->sampler_count != 0) { dri_emit_reloc(bo, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, wm.wm4.stats_enable | (wm.wm4.sampler_count << 2), offsetof(struct brw_wm_unit_state, wm4), brw->wm.sampler_bo); diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 0d91391964..73f4b2b4a3 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -204,7 +204,7 @@ brw_create_texture_surface( struct brw_context *brw, /* Emit relocation to surface contents */ dri_emit_reloc(bo, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + DRM_GEM_DOMAIN_I915_SAMPLER, 0, 0, offsetof(struct brw_surface_state, ss1), key->bo); @@ -337,10 +337,14 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, &surf, sizeof(surf), NULL, NULL); if (region_bo != NULL) { + /* We might sample from it, and we might render to it, so flag + * them both. We might be able to figure out from other state + * a more restrictive relocation to emit. + */ dri_emit_reloc(brw->wm.surf_bo[unit], - DRM_BO_FLAG_MEM_TT | - DRM_BO_FLAG_READ | - DRM_BO_FLAG_WRITE, + DRM_GEM_DOMAIN_I915_RENDER | + DRM_GEM_DOMAIN_I915_SAMPLER, + DRM_GEM_DOMAIN_I915_RENDER, 0, offsetof(struct brw_surface_state, ss1), region_bo); @@ -388,9 +392,7 @@ brw_wm_get_binding_table(struct brw_context *brw) for (i = 0; i < BRW_WM_MAX_SURF; i++) { if (brw->wm.surf_bo[i] != NULL) { dri_emit_reloc(bind_bo, - DRM_BO_FLAG_MEM_TT | - DRM_BO_FLAG_READ | - DRM_BO_FLAG_WRITE, + DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, 0, i * sizeof(GLuint), brw->wm.surf_bo[i]); diff --git a/src/mesa/drivers/dri/i965/intel_bufmgr_fake.c b/src/mesa/drivers/dri/i965/intel_bufmgr_fake.c new file mode 120000 index 0000000000..9b840a8123 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_bufmgr_fake.c @@ -0,0 +1 @@ +../intel/intel_bufmgr_fake.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index bab8e645d4..f22e6c0967 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -267,11 +267,13 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, dri_bo *buffer, - GLuint flags, GLuint delta) + uint32_t read_domains, uint32_t write_domain, + uint32_t delta) { int ret; - ret = dri_emit_reloc(batch->buf, flags, delta, batch->ptr - batch->map, buffer); + ret = dri_emit_reloc(batch->buf, read_domains, write_domain, + delta, batch->ptr - batch->map, buffer); /* * Using the old buffer offset, write in what the right data would be, in case diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h index feddfb46df..7268bd59da 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h @@ -79,7 +79,9 @@ void intel_batchbuffer_release_space(struct intel_batchbuffer *batch, GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, dri_bo *buffer, - GLuint flags, GLuint offset); + uint32_t read_domains, + uint32_t write_domain, + uint32_t offset); /* Inline functions - might actually be better off with these * non-inlined. Certainly better off switching all command packets to @@ -131,9 +133,10 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch, #define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d) -#define OUT_RELOC(buf, cliprect_mode, delta) do { \ +#define OUT_RELOC(buf, read_domains, write_domain, delta) do { \ assert((delta) >= 0); \ - intel_batchbuffer_emit_reloc(intel->batch, buf, cliprect_mode, delta); \ + intel_batchbuffer_emit_reloc(intel->batch, buf, \ + read_domains, write_domain, delta); \ } while (0) #define ADVANCE_BATCH() do { } while(0) diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index b7d36d8cd6..3d7f64e28b 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -148,10 +148,14 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv, OUT_BATCH((box.y1 << 16) | box.x1); OUT_BATCH((box.y2 << 16) | box.x2); - OUT_RELOC(dst->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, 0); + OUT_RELOC(dst->buffer, + DRM_GEM_DOMAIN_I915_RENDER, DRM_GEM_DOMAIN_I915_RENDER, + 0); OUT_BATCH((src_y << 16) | src_x); OUT_BATCH(src_pitch); - OUT_RELOC(src->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0); + OUT_RELOC(src->buffer, + DRM_GEM_DOMAIN_I915_RENDER, 0, + 0); ADVANCE_BATCH(); } @@ -212,7 +216,9 @@ intelEmitFillBlit(struct intel_context *intel, OUT_BATCH(BR13 | dst_pitch); OUT_BATCH((y << 16) | x); OUT_BATCH(((y + h) << 16) | (x + w)); - OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, dst_offset); + OUT_RELOC(dst_buffer, + DRM_GEM_DOMAIN_I915_RENDER, DRM_GEM_DOMAIN_I915_RENDER, + dst_offset); OUT_BATCH(color); ADVANCE_BATCH(); } @@ -332,11 +338,13 @@ intelEmitCopyBlit(struct intel_context *intel, OUT_BATCH(BR13 | dst_pitch); OUT_BATCH((dst_y << 16) | dst_x); OUT_BATCH((dst_y2 << 16) | dst_x2); - OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, + OUT_RELOC(dst_buffer, + DRM_GEM_DOMAIN_I915_RENDER, DRM_GEM_DOMAIN_I915_RENDER, dst_offset); OUT_BATCH((src_y << 16) | src_x); OUT_BATCH(src_pitch); - OUT_RELOC(src_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + OUT_RELOC(src_buffer, + DRM_GEM_DOMAIN_I915_RENDER, 0, src_offset); ADVANCE_BATCH(); } @@ -349,11 +357,13 @@ intelEmitCopyBlit(struct intel_context *intel, OUT_BATCH(BR13 | dst_pitch); OUT_BATCH((0 << 16) | dst_x); OUT_BATCH((h << 16) | dst_x2); - OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, + OUT_RELOC(dst_buffer, + DRM_GEM_DOMAIN_I915_RENDER, DRM_GEM_DOMAIN_I915_RENDER, dst_offset + dst_y * dst_pitch); OUT_BATCH((0 << 16) | src_x); OUT_BATCH(src_pitch); - OUT_RELOC(src_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + OUT_RELOC(src_buffer, + DRM_GEM_DOMAIN_I915_RENDER, 0, src_offset + src_y * src_pitch); ADVANCE_BATCH(); } @@ -528,7 +538,8 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) OUT_BATCH(BR13); OUT_BATCH((b.y1 << 16) | b.x1); OUT_BATCH((b.y2 << 16) | b.x2); - OUT_RELOC(write_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, + OUT_RELOC(write_buffer, + DRM_GEM_DOMAIN_I915_RENDER, DRM_GEM_DOMAIN_I915_RENDER, irb_region->draw_offset); OUT_BATCH(clearVal); ADVANCE_BATCH(); @@ -600,7 +611,9 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, OUT_BATCH(br13); OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */ OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */ - OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, dst_offset); + OUT_RELOC(dst_buffer, + DRM_GEM_DOMAIN_I915_RENDER, DRM_GEM_DOMAIN_I915_RENDER, + dst_offset); OUT_BATCH(0); /* bg */ OUT_BATCH(fg_color); /* fg */ OUT_BATCH(0); /* pattern base addr */ diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_fake.c b/src/mesa/drivers/dri/intel/intel_bufmgr_fake.c new file mode 100644 index 0000000000..5d01a471c5 --- /dev/null +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_fake.c @@ -0,0 +1,1173 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Originally a fake version of the buffer manager so that we can + * prototype the changes in a driver fairly quickly, has been fleshed + * out to a fully functional interim solution. + * + * Basically wraps the old style memory management in the new + * programming interface, but is more expressive and avoids many of + * the bugs in the old texture manager. + */ +#include "mtypes.h" +#include "dri_bufmgr.h" +#include "intel_bufmgr_fake.h" +#include "drm.h" +#include "i915_drm.h" + +#include "simple_list.h" +#include "mm.h" +#include "imports.h" + +#define DBG(...) do { \ + if (bufmgr_fake->bufmgr.debug) \ + _mesa_printf(__VA_ARGS__); \ +} while (0) + +/* Internal flags: + */ +#define BM_NO_BACKING_STORE 0x00000001 +#define BM_NO_FENCE_SUBDATA 0x00000002 +#define BM_PINNED 0x00000004 + +/* Wrapper around mm.c's mem_block, which understands that you must + * wait for fences to expire before memory can be freed. This is + * specific to our use of memcpy for uploads - an upload that was + * processed through the command queue wouldn't need to care about + * fences. + */ +#define MAX_RELOCS 4096 + +struct fake_buffer_reloc +{ + /** Buffer object that the relocation points at. */ + dri_bo *target_buf; + /** Offset of the relocation entry within reloc_buf. */ + GLuint offset; + /** Cached value of the offset when we last performed this relocation. */ + GLuint last_target_offset; + /** Value added to target_buf's offset to get the relocation entry. */ + GLuint delta; + /** Cache domains the target buffer is read into. */ + uint32_t read_domains; + /** Cache domain the target buffer will have dirty cachelines in. */ + uint32_t write_domain; +}; + +struct block { + struct block *next, *prev; + struct mem_block *mem; /* BM_MEM_AGP */ + + /** + * Marks that the block is currently in the aperture and has yet to be + * fenced. + */ + unsigned on_hardware:1; + /** + * Marks that the block is currently fenced (being used by rendering) and + * can't be freed until @fence is passed. + */ + unsigned fenced:1; + + /** Fence cookie for the block. */ + unsigned fence; /* Split to read_fence, write_fence */ + + dri_bo *bo; + void *virtual; +}; + +typedef struct _bufmgr_fake { + dri_bufmgr bufmgr; + + unsigned long low_offset; + unsigned long size; + void *virtual; + + struct mem_block *heap; + struct block lru; /* only allocated, non-fence-pending blocks here */ + + unsigned buf_nr; /* for generating ids */ + + struct block on_hardware; /* after bmValidateBuffers */ + struct block fenced; /* after bmFenceBuffers (mi_flush, emit irq, write dword) */ + /* then to bufmgr->lru or free() */ + + unsigned int last_fence; + + unsigned fail:1; + unsigned need_fence:1; + GLboolean thrashing; + + /** + * Driver callback to emit a fence, returning the cookie. + * + * Currently, this also requires that a write flush be emitted before + * emitting the fence, but this should change. + */ + unsigned int (*fence_emit)(void *private); + /** Driver callback to wait for a fence cookie to have passed. */ + int (*fence_wait)(void *private, unsigned int fence_cookie); + /** Driver-supplied argument to driver callbacks */ + void *driver_priv; + + GLboolean debug; + + GLboolean performed_rendering; + + /* keep track of the current total size of objects we have relocs for */ + unsigned long current_total_size; +} dri_bufmgr_fake; + +typedef struct _dri_bo_fake { + dri_bo bo; + + unsigned id; /* debug only */ + const char *name; + + unsigned dirty:1; + unsigned size_accounted:1; /*this buffers size has been accounted against the aperture */ + unsigned card_dirty:1; /* has the card written to this buffer - we make need to copy it back */ + unsigned int refcount; + /* Flags may consist of any of the DRM_BO flags, plus + * DRM_BO_NO_BACKING_STORE and BM_NO_FENCE_SUBDATA, which are the first two + * driver private flags. + */ + uint64_t flags; + /** Cache domains the target buffer is read into. */ + uint32_t read_domains; + /** Cache domain the target buffer will have dirty cachelines in. */ + uint32_t write_domain; + + unsigned int alignment; + GLboolean is_static, validated; + unsigned int map_count; + + /** relocation list */ + struct fake_buffer_reloc *relocs; + GLuint nr_relocs; + + struct block *block; + void *backing_store; + void (*invalidate_cb)(dri_bo *bo, void *ptr); + void *invalidate_ptr; +} dri_bo_fake; + +static int clear_fenced(dri_bufmgr_fake *bufmgr_fake, + unsigned int fence_cookie); + +static int dri_fake_check_aperture_space(dri_bo *bo); + +#define MAXFENCE 0x7fffffff + +static GLboolean FENCE_LTE( unsigned a, unsigned b ) +{ + if (a == b) + return GL_TRUE; + + if (a < b && b - a < (1<<24)) + return GL_TRUE; + + if (a > b && MAXFENCE - a + b < (1<<24)) + return GL_TRUE; + + return GL_FALSE; +} + +static unsigned int +_fence_emit_internal(dri_bufmgr_fake *bufmgr_fake) +{ + bufmgr_fake->last_fence = bufmgr_fake->fence_emit(bufmgr_fake->driver_priv); + return bufmgr_fake->last_fence; +} + +static void +_fence_wait_internal(dri_bufmgr_fake *bufmgr_fake, unsigned int cookie) +{ + int ret; + + ret = bufmgr_fake->fence_wait(bufmgr_fake->driver_priv, cookie); + if (ret != 0) { + _mesa_printf("%s:%d: Error %d waiting for fence.\n", + __FILE__, __LINE__); + abort(); + } + clear_fenced(bufmgr_fake, cookie); +} + +static GLboolean +_fence_test(dri_bufmgr_fake *bufmgr_fake, unsigned fence) +{ + /* Slight problem with wrap-around: + */ + return fence == 0 || FENCE_LTE(fence, bufmgr_fake->last_fence); +} + +/** + * Allocate a memory manager block for the buffer. + */ +static GLboolean +alloc_block(dri_bo *bo) +{ + dri_bo_fake *bo_fake = (dri_bo_fake *)bo; + dri_bufmgr_fake *bufmgr_fake= (dri_bufmgr_fake *)bo->bufmgr; + struct block *block = (struct block *)calloc(sizeof *block, 1); + unsigned int align_log2 = _mesa_ffs(bo_fake->alignment) - 1; + GLuint sz; + + if (!block) + return GL_FALSE; + + sz = (bo->size + bo_fake->alignment - 1) & ~(bo_fake->alignment - 1); + + block->mem = mmAllocMem(bufmgr_fake->heap, sz, align_log2, 0); + if (!block->mem) { + free(block); + return GL_FALSE; + } + + make_empty_list(block); + + /* Insert at head or at tail??? + */ + insert_at_tail(&bufmgr_fake->lru, block); + + block->virtual = bufmgr_fake->virtual + + block->mem->ofs - bufmgr_fake->low_offset; + block->bo = bo; + + bo_fake->block = block; + + return GL_TRUE; +} + +/* Release the card storage associated with buf: + */ +static void free_block(dri_bufmgr_fake *bufmgr_fake, struct block *block) +{ + dri_bo_fake *bo_fake; + DBG("free block %p %08x %d %d\n", block, block->mem->ofs, block->on_hardware, block->fenced); + + if (!block) + return; + + bo_fake = (dri_bo_fake *)block->bo; + if (!(bo_fake->flags & BM_NO_BACKING_STORE) && (bo_fake->card_dirty == 1)) { + memcpy(bo_fake->backing_store, block->virtual, block->bo->size); + bo_fake->card_dirty = 1; + bo_fake->dirty = 1; + } + + if (block->on_hardware) { + block->bo = NULL; + } + else if (block->fenced) { + block->bo = NULL; + } + else { + DBG(" - free immediately\n"); + remove_from_list(block); + + mmFreeMem(block->mem); + free(block); + } +} + +static void +alloc_backing_store(dri_bo *bo) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; + dri_bo_fake *bo_fake = (dri_bo_fake *)bo; + assert(!bo_fake->backing_store); + assert(!(bo_fake->flags & (BM_PINNED|BM_NO_BACKING_STORE))); + + bo_fake->backing_store = ALIGN_MALLOC(bo->size, 64); + + DBG("alloc_backing - buf %d %p %d\n", bo_fake->id, bo_fake->backing_store, bo->size); + assert(bo_fake->backing_store); +} + +static void +free_backing_store(dri_bo *bo) +{ + dri_bo_fake *bo_fake = (dri_bo_fake *)bo; + + if (bo_fake->backing_store) { + assert(!(bo_fake->flags & (BM_PINNED|BM_NO_BACKING_STORE))); + ALIGN_FREE(bo_fake->backing_store); + bo_fake->backing_store = NULL; + } +} + +static void +set_dirty(dri_bo *bo) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; + dri_bo_fake *bo_fake = (dri_bo_fake *)bo; + + if (bo_fake->flags & BM_NO_BACKING_STORE && bo_fake->invalidate_cb != NULL) + bo_fake->invalidate_cb(bo, bo_fake->invalidate_ptr); + + assert(!(bo_fake->flags & BM_PINNED)); + + DBG("set_dirty - buf %d\n", bo_fake->id); + bo_fake->dirty = 1; +} + +static GLboolean +evict_lru(dri_bufmgr_fake *bufmgr_fake, GLuint max_fence) +{ + struct block *block, *tmp; + + DBG("%s\n", __FUNCTION__); + + foreach_s(block, tmp, &bufmgr_fake->lru) { + dri_bo_fake *bo_fake = (dri_bo_fake *)block->bo; + + if (bo_fake != NULL && (bo_fake->flags & BM_NO_FENCE_SUBDATA)) + continue; + + if (block->fence && max_fence && !FENCE_LTE(block->fence, max_fence)) + return 0; + + set_dirty(&bo_fake->bo); + bo_fake->block = NULL; + + free_block(bufmgr_fake, block); + return GL_TRUE; + } + + return GL_FALSE; +} + +#define foreach_s_rev(ptr, t, list) \ + for(ptr=(list)->prev,t=(ptr)->prev; list != ptr; ptr=t, t=(t)->prev) + +static GLboolean +evict_mru(dri_bufmgr_fake *bufmgr_fake) +{ + struct block *block, *tmp; + + DBG("%s\n", __FUNCTION__); + + foreach_s_rev(block, tmp, &bufmgr_fake->lru) { + dri_bo_fake *bo_fake = (dri_bo_fake *)block->bo; + + if (bo_fake && (bo_fake->flags & BM_NO_FENCE_SUBDATA)) + continue; + + set_dirty(&bo_fake->bo); + bo_fake->block = NULL; + + free_block(bufmgr_fake, block); + return GL_TRUE; + } + + return GL_FALSE; +} + +/** + * Removes all objects from the fenced list older than the given fence. + */ +static int clear_fenced(dri_bufmgr_fake *bufmgr_fake, + unsigned int fence_cookie) +{ + struct block *block, *tmp; + int ret = 0; + + foreach_s(block, tmp, &bufmgr_fake->fenced) { + assert(block->fenced); + + if (_fence_test(bufmgr_fake, block->fence)) { + + block->fenced = 0; + + if (!block->bo) { + DBG("delayed free: offset %x sz %x\n", + block->mem->ofs, block->mem->size); + remove_from_list(block); + mmFreeMem(block->mem); + free(block); + } + else { + DBG("return to lru: offset %x sz %x\n", + block->mem->ofs, block->mem->size); + move_to_tail(&bufmgr_fake->lru, block); + } + + ret = 1; + } + else { + /* Blocks are ordered by fence, so if one fails, all from + * here will fail also: + */ + DBG("fence not passed: offset %x sz %x %d %d \n", + block->mem->ofs, block->mem->size, block->fence, bufmgr_fake->last_fence); + break; + } + } + + DBG("%s: %d\n", __FUNCTION__, ret); + return ret; +} + +static void fence_blocks(dri_bufmgr_fake *bufmgr_fake, unsigned fence) +{ + struct block *block, *tmp; + + foreach_s (block, tmp, &bufmgr_fake->on_hardware) { + DBG("Fence block %p (sz 0x%x ofs %x buf %p) with fence %d\n", block, + block->mem->size, block->mem->ofs, block->bo, fence); + block->fence = fence; + + block->on_hardware = 0; + block->fenced = 1; + + /* Move to tail of pending list here + */ + move_to_tail(&bufmgr_fake->fenced, block); + } + + assert(is_empty_list(&bufmgr_fake->on_hardware)); +} + +static GLboolean evict_and_alloc_block(dri_bo *bo) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; + dri_bo_fake *bo_fake = (dri_bo_fake *)bo; + + assert(bo_fake->block == NULL); + + /* Search for already free memory: + */ + if (alloc_block(bo)) + return GL_TRUE; + + /* If we're not thrashing, allow lru eviction to dig deeper into + * recently used textures. We'll probably be thrashing soon: + */ + if (!bufmgr_fake->thrashing) { + while (evict_lru(bufmgr_fake, 0)) + if (alloc_block(bo)) + return GL_TRUE; + } + + /* Keep thrashing counter alive? + */ + if (bufmgr_fake->thrashing) + bufmgr_fake->thrashing = 20; + + /* Wait on any already pending fences - here we are waiting for any + * freed memory that has been submitted to hardware and fenced to + * become available: + */ + while (!is_empty_list(&bufmgr_fake->fenced)) { + GLuint fence = bufmgr_fake->fenced.next->fence; + _fence_wait_internal(bufmgr_fake, fence); + + if (alloc_block(bo)) + return GL_TRUE; + } + + if (!is_empty_list(&bufmgr_fake->on_hardware)) { + while (!is_empty_list(&bufmgr_fake->fenced)) { + GLuint fence = bufmgr_fake->fenced.next->fence; + _fence_wait_internal(bufmgr_fake, fence); + } + + if (!bufmgr_fake->thrashing) { + DBG("thrashing\n"); + } + bufmgr_fake->thrashing = 20; + + if (alloc_block(bo)) + return GL_TRUE; + } + + while (evict_mru(bufmgr_fake)) + if (alloc_block(bo)) + return GL_TRUE; + + DBG("%s 0x%x bytes failed\n", __FUNCTION__, bo->size); + + return GL_FALSE; +} + +/*********************************************************************** + * Public functions + */ + +/** + * Wait for hardware idle by emitting a fence and waiting for it. + */ +static void +dri_bufmgr_fake_wait_idle(dri_bufmgr_fake *bufmgr_fake) +{ + unsigned int cookie; + + cookie = bufmgr_fake->fence_emit(bufmgr_fake->driver_priv); + _fence_wait_internal(bufmgr_fake, cookie); +} + +/** + * Wait for execution pending on a buffer + */ +static void +dri_bufmgr_fake_bo_wait_idle(dri_bo *bo) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; + dri_bo_fake *bo_fake = (dri_bo_fake *)bo; + + if (bo_fake->block == NULL || !bo_fake->block->fenced) + return; + + _fence_wait_internal(bufmgr_fake, bo_fake->block->fence); +} + +/* Specifically ignore texture memory sharing. + * -- just evict everything + * -- and wait for idle + */ +void +dri_bufmgr_fake_contended_lock_take(dri_bufmgr *bufmgr) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bufmgr; + struct block *block, *tmp; + + bufmgr_fake->need_fence = 1; + bufmgr_fake->fail = 0; + + /* Wait for hardware idle. We don't know where acceleration has been + * happening, so we'll need to wait anyway before letting anything get + * put on the card again. + */ + dri_bufmgr_fake_wait_idle(bufmgr_fake); + + /* Check that we hadn't released the lock without having fenced the last + * set of buffers. + */ + assert(is_empty_list(&bufmgr_fake->fenced)); + assert(is_empty_list(&bufmgr_fake->on_hardware)); + + foreach_s(block, tmp, &bufmgr_fake->lru) { + assert(_fence_test(bufmgr_fake, block->fence)); + set_dirty(block->bo); + } +} + +static dri_bo * +dri_fake_bo_alloc(dri_bufmgr *bufmgr, const char *name, + unsigned long size, unsigned int alignment, + uint64_t location_mask) +{ + dri_bufmgr_fake *bufmgr_fake; + dri_bo_fake *bo_fake; + + bufmgr_fake = (dri_bufmgr_fake *)bufmgr; + + assert(size != 0); + + bo_fake = calloc(1, sizeof(*bo_fake)); + if (!bo_fake) + return NULL; + + bo_fake->bo.size = size; + bo_fake->bo.offset = -1; + bo_fake->bo.virtual = NULL; + bo_fake->bo.bufmgr = bufmgr; + bo_fake->refcount = 1; + + /* Alignment must be a power of two */ + assert((alignment & (alignment - 1)) == 0); + if (alignment == 0) + alignment = 1; + bo_fake->alignment = alignment; + bo_fake->id = ++bufmgr_fake->buf_nr; + bo_fake->name = name; + bo_fake->flags = 0; + bo_fake->is_static = GL_FALSE; + + DBG("drm_bo_alloc: (buf %d: %s, %d kb)\n", bo_fake->id, bo_fake->name, + bo_fake->bo.size / 1024); + + return &bo_fake->bo; +} + +static dri_bo * +dri_fake_bo_alloc_static(dri_bufmgr *bufmgr, const char *name, + unsigned long offset, unsigned long size, + void *virtual, uint64_t location_mask) +{ + dri_bufmgr_fake *bufmgr_fake; + dri_bo_fake *bo_fake; + + bufmgr_fake = (dri_bufmgr_fake *)bufmgr; + + assert(size != 0); + + bo_fake = calloc(1, sizeof(*bo_fake)); + if (!bo_fake) + return NULL; + + bo_fake->bo.size = size; + bo_fake->bo.offset = offset; + bo_fake->bo.virtual = virtual; + bo_fake->bo.bufmgr = bufmgr; + bo_fake->refcount = 1; + bo_fake->id = ++bufmgr_fake->buf_nr; + bo_fake->name = name; + bo_fake->flags = BM_PINNED | DRM_BO_FLAG_NO_MOVE; + bo_fake->is_static = GL_TRUE; + + DBG("drm_bo_alloc_static: (buf %d: %s, %d kb)\n", bo_fake->id, bo_fake->name, + bo_fake->bo.size / 1024); + + return &bo_fake->bo; +} + +static void +dri_fake_bo_reference(dri_bo *bo) +{ + dri_bo_fake *bo_fake = (dri_bo_fake *)bo; + + bo_fake->refcount++; +} + +static void +dri_fake_bo_unreference(dri_bo *bo) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; + dri_bo_fake *bo_fake = (dri_bo_fake *)bo; + int i; + + if (!bo) + return; + + if (--bo_fake->refcount == 0) { + assert(bo_fake->map_count == 0); + /* No remaining references, so free it */ + if (bo_fake->block) + free_block(bufmgr_fake, bo_fake->block); + free_backing_store(bo); + + for (i = 0; i < bo_fake->nr_relocs; i++) + dri_bo_unreference(bo_fake->relocs[i].target_buf); + + DBG("drm_bo_unreference: free buf %d %s\n", bo_fake->id, bo_fake->name); + + free(bo_fake->relocs); + free(bo); + + return; + } +} + +/** + * Set the buffer as not requiring backing store, and instead get the callback + * invoked whenever it would be set dirty. + */ +void dri_bo_fake_disable_backing_store(dri_bo *bo, + void (*invalidate_cb)(dri_bo *bo, + void *ptr), + void *ptr) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; + dri_bo_fake *bo_fake = (dri_bo_fake *)bo; + + if (bo_fake->backing_store) + free_backing_store(bo); + + bo_fake->flags |= BM_NO_BACKING_STORE; + + DBG("disable_backing_store set buf %d dirty\n", bo_fake->id); + bo_fake->dirty = 1; + bo_fake->invalidate_cb = invalidate_cb; + bo_fake->invalidate_ptr = ptr; + + /* Note that it is invalid right from the start. Also note + * invalidate_cb is called with the bufmgr locked, so cannot + * itself make bufmgr calls. + */ + if (invalidate_cb != NULL) + invalidate_cb(bo, ptr); +} + +/** + * Map a buffer into bo->virtual, allocating either card memory space (If + * BM_NO_BACKING_STORE or BM_PINNED) or backing store, as necessary. + */ +static int +dri_fake_bo_map(dri_bo *bo, GLboolean write_enable) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; + dri_bo_fake *bo_fake = (dri_bo_fake *)bo; + + /* Static buffers are always mapped. */ + if (bo_fake->is_static) + return 0; + + /* Allow recursive mapping. Mesa may recursively map buffers with + * nested display loops, and it is used internally in bufmgr_fake + * for relocation. + */ + if (bo_fake->map_count++ != 0) + return 0; + + { + DBG("drm_bo_map: (buf %d: %s, %d kb)\n", bo_fake->id, bo_fake->name, + bo_fake->bo.size / 1024); + + if (bo->virtual != NULL) { + _mesa_printf("%s: already mapped\n", __FUNCTION__); + abort(); + } + else if (bo_fake->flags & (BM_NO_BACKING_STORE|BM_PINNED)) { + + if (!bo_fake->block && !evict_and_alloc_block(bo)) { + DBG("%s: alloc failed\n", __FUNCTION__); + bufmgr_fake->fail = 1; + return 1; + } + else { + assert(bo_fake->block); + bo_fake->dirty = 0; + + if (!(bo_fake->flags & BM_NO_FENCE_SUBDATA) && + bo_fake->block->fenced) { + dri_bufmgr_fake_bo_wait_idle(bo); + } + + bo->virtual = bo_fake->block->virtual; + } + } + else { + if (write_enable) + set_dirty(bo); + + if (bo_fake->backing_store == 0) + alloc_backing_store(bo); + + bo->virtual = bo_fake->backing_store; + } + } + + return 0; +} + +static int +dri_fake_bo_unmap(dri_bo *bo) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; + dri_bo_fake *bo_fake = (dri_bo_fake *)bo; + + /* Static buffers are always mapped. */ + if (bo_fake->is_static) + return 0; + + assert(bo_fake->map_count != 0); + if (--bo_fake->map_count != 0) + return 0; + + DBG("drm_bo_unmap: (buf %d: %s, %d kb)\n", bo_fake->id, bo_fake->name, + bo_fake->bo.size / 1024); + + bo->virtual = NULL; + + return 0; +} + +static void +dri_fake_kick_all(dri_bufmgr_fake *bufmgr_fake) +{ + struct block *block, *tmp; + + bufmgr_fake->performed_rendering = GL_FALSE; + /* okay for ever BO that is on the HW kick it off. + seriously not afraid of the POLICE right now */ + foreach_s(block, tmp, &bufmgr_fake->on_hardware) { + dri_bo_fake *bo_fake = (dri_bo_fake *)block->bo; + + block->on_hardware = 0; + free_block(bufmgr_fake, block); + bo_fake->block = NULL; + bo_fake->validated = GL_FALSE; + if (!(bo_fake->flags & BM_NO_BACKING_STORE)) + bo_fake->dirty = 1; + } +} + +static int +dri_fake_bo_validate(dri_bo *bo) +{ + dri_bufmgr_fake *bufmgr_fake; + dri_bo_fake *bo_fake = (dri_bo_fake *)bo; + + /* XXX: Sanity-check whether we've already validated this one under + * different flags. See drmAddValidateItem(). + */ + bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; + + DBG("drm_bo_validate: (buf %d: %s, %d kb)\n", bo_fake->id, bo_fake->name, + bo_fake->bo.size / 1024); + + /* Sanity check: Buffers should be unmapped before being validated. + * This is not so much of a problem for bufmgr_fake, but TTM refuses, + * and the problem is harder to debug there. + */ + assert(bo_fake->map_count == 0); + + if (bo_fake->is_static) { + /* Add it to the needs-fence list */ + bufmgr_fake->need_fence = 1; + return 0; + } + + /* reset size accounted */ + bo_fake->size_accounted = 0; + + /* Allocate the card memory */ + if (!bo_fake->block && !evict_and_alloc_block(bo)) { + bufmgr_fake->fail = 1; + DBG("Failed to validate buf %d:%s\n", bo_fake->id, bo_fake->name); + return -1; + } + + assert(bo_fake->block); + assert(bo_fake->block->bo == &bo_fake->bo); + + bo->offset = bo_fake->block->mem->ofs; + + /* Upload the buffer contents if necessary */ + if (bo_fake->dirty) { + DBG("Upload dirty buf %d:%s, sz %d offset 0x%x\n", bo_fake->id, + bo_fake->name, bo->size, bo_fake->block->mem->ofs); + + assert(!(bo_fake->flags & + (BM_NO_BACKING_STORE|BM_PINNED))); + + /* Actually, should be able to just wait for a fence on the memory, + * which we would be tracking when we free it. Waiting for idle is + * a sufficiently large hammer for now. + */ + dri_bufmgr_fake_wait_idle(bufmgr_fake); + + /* we may never have mapped this BO so it might not have any backing + * store if this happens it should be rare, but 0 the card memory + * in any case */ + if (bo_fake->backing_store) + memcpy(bo_fake->block->virtual, bo_fake->backing_store, bo->size); + else + memset(bo_fake->block->virtual, 0, bo->size); + + bo_fake->dirty = 0; + } + + bo_fake->block->fenced = 0; + bo_fake->block->on_hardware = 1; + move_to_tail(&bufmgr_fake->on_hardware, bo_fake->block); + + bo_fake->validated = GL_TRUE; + bufmgr_fake->need_fence = 1; + + return 0; +} + +static void +dri_fake_fence_validated(dri_bufmgr *bufmgr) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bufmgr; + unsigned int cookie; + + cookie = _fence_emit_internal(bufmgr_fake); + fence_blocks(bufmgr_fake, cookie); + + DBG("drm_fence_validated: 0x%08x cookie\n", cookie); +} + +static void +dri_fake_destroy(dri_bufmgr *bufmgr) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bufmgr; + + mmDestroy(bufmgr_fake->heap); + free(bufmgr); +} + +static int +dri_fake_emit_reloc(dri_bo *reloc_buf, + uint32_t read_domains, uint32_t write_domain, + uint32_t delta, uint32_t offset, dri_bo *target_buf) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)reloc_buf->bufmgr; + struct fake_buffer_reloc *r; + dri_bo_fake *reloc_fake = (dri_bo_fake *)reloc_buf; + dri_bo_fake *target_fake = (dri_bo_fake *)target_buf; + int i; + + assert(reloc_buf); + assert(target_buf); + + assert(target_fake->is_static || target_fake->size_accounted); + + if (reloc_fake->relocs == NULL) { + reloc_fake->relocs = malloc(sizeof(struct fake_buffer_reloc) * + MAX_RELOCS); + } + + r = &reloc_fake->relocs[reloc_fake->nr_relocs++]; + + assert(reloc_fake->nr_relocs <= MAX_RELOCS); + + dri_bo_reference(target_buf); + + r->target_buf = target_buf; + r->offset = offset; + r->last_target_offset = target_buf->offset; + r->delta = delta; + r->read_domains = read_domains; + r->write_domain = write_domain; + + if (bufmgr_fake->debug) { + /* Check that a conflicting relocation hasn't already been emitted. */ + for (i = 0; i < reloc_fake->nr_relocs - 1; i++) { + struct fake_buffer_reloc *r2 = &reloc_fake->relocs[i]; + + assert(r->offset != r2->offset); + } + } + + return 0; +} + +/** + * Incorporates the validation flags associated with each relocation into + * the combined validation flags for the buffer on this batchbuffer submission. + */ +static void +dri_fake_calculate_domains(dri_bo *bo) +{ + dri_bo_fake *bo_fake = (dri_bo_fake *)bo; + int i; + + for (i = 0; i < bo_fake->nr_relocs; i++) { + struct fake_buffer_reloc *r = &bo_fake->relocs[i]; + dri_bo_fake *target_fake = (dri_bo_fake *)r->target_buf; + + /* Do the same for the tree of buffers we depend on */ + dri_fake_calculate_domains(r->target_buf); + + target_fake->read_domains |= r->read_domains; + if (target_fake->write_domain != 0) + target_fake->write_domain = r->write_domain; + } +} + + +static int +dri_fake_reloc_and_validate_buffer(dri_bo *bo) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; + dri_bo_fake *bo_fake = (dri_bo_fake *)bo; + int i, ret; + + assert(bo_fake->map_count == 0); + + for (i = 0; i < bo_fake->nr_relocs; i++) { + struct fake_buffer_reloc *r = &bo_fake->relocs[i]; + dri_bo_fake *target_fake = (dri_bo_fake *)r->target_buf; + uint32_t reloc_data; + + /* Validate the target buffer if that hasn't been done. */ + if (!target_fake->validated) { + ret = dri_fake_reloc_and_validate_buffer(r->target_buf); + if (ret != 0) { + if (bo->virtual != NULL) + dri_bo_unmap(bo); + return ret; + } + } + + /* Calculate the value of the relocation entry. */ + if (r->target_buf->offset != r->last_target_offset) { + reloc_data = r->target_buf->offset + r->delta; + + if (bo->virtual == NULL) + dri_bo_map(bo, GL_TRUE); + + *(uint32_t *)(bo->virtual + r->offset) = reloc_data; + + r->last_target_offset = r->target_buf->offset; + } + } + + if (bo->virtual != NULL) + dri_bo_unmap(bo); + + if (bo_fake->write_domain != 0) { + if (!(bo_fake->flags & (BM_NO_BACKING_STORE|BM_PINNED))) { + if (bo_fake->backing_store == 0) + alloc_backing_store(bo); + + bo_fake->card_dirty = 1; + } + bufmgr_fake->performed_rendering = GL_TRUE; + } + + return dri_fake_bo_validate(bo); +} + +static void * +dri_fake_process_relocs(dri_bo *batch_buf) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)batch_buf->bufmgr; + dri_bo_fake *batch_fake = (dri_bo_fake *)batch_buf; + int ret; + int retry_count = 0; + + bufmgr_fake->performed_rendering = GL_FALSE; + + dri_fake_calculate_domains(batch_buf); + + batch_fake->read_domains = DRM_GEM_DOMAIN_I915_COMMAND; + + /* we've ran out of RAM so blow the whole lot away and retry */ + restart: + ret = dri_fake_reloc_and_validate_buffer(batch_buf); + if (bufmgr_fake->fail == 1) { + if (retry_count == 0) { + retry_count++; + dri_fake_kick_all(bufmgr_fake); + bufmgr_fake->fail = 0; + goto restart; + } else /* dump out the memory here */ + mmDumpMemInfo(bufmgr_fake->heap); + } + + assert(ret == 0); + + bufmgr_fake->current_total_size = 0; + return NULL; +} + +static void +dri_bo_fake_post_submit(dri_bo *bo) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; + dri_bo_fake *bo_fake = (dri_bo_fake *)bo; + int i; + + for (i = 0; i < bo_fake->nr_relocs; i++) { + struct fake_buffer_reloc *r = &bo_fake->relocs[i]; + dri_bo_fake *target_fake = (dri_bo_fake *)r->target_buf; + + if (target_fake->validated) + dri_bo_fake_post_submit(r->target_buf); + + DBG("%s@0x%08x + 0x%08x -> %s@0x%08x + 0x%08x\n", + bo_fake->name, (uint32_t)bo->offset, r->offset, + target_fake->name, (uint32_t)r->target_buf->offset, r->delta); + } + + assert(bo_fake->map_count == 0); + bo_fake->validated = GL_FALSE; + bo_fake->read_domains = 0; + bo_fake->write_domain = 0; +} + + +static void +dri_fake_post_submit(dri_bo *batch_buf) +{ + dri_fake_fence_validated(batch_buf->bufmgr); + + dri_bo_fake_post_submit(batch_buf); +} + +static int +dri_fake_check_aperture_space(dri_bo *bo) +{ + dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; + dri_bo_fake *bo_fake = (dri_bo_fake *)bo; + GLuint sz; + + sz = (bo->size + bo_fake->alignment - 1) & ~(bo_fake->alignment - 1); + + if (bo_fake->size_accounted || bo_fake->is_static) + return 0; + + if (bufmgr_fake->current_total_size + sz > bufmgr_fake->size) { + DBG("check_space: %s bo %d %d overflowed bufmgr size %d\n", bo_fake->name, bo_fake->id, sz, bufmgr_fake->size); + return -1; + } + + bufmgr_fake->current_total_size += sz; + bo_fake->size_accounted = 1; + DBG("drm_check_space: buf %d, %s %d %d\n", bo_fake->id, bo_fake->name, bo->size, bufmgr_fake->current_total_size); + return 0; +} + +dri_bufmgr * +dri_bufmgr_fake_init(unsigned long low_offset, void *low_virtual, + unsigned long size, + unsigned int (*fence_emit)(void *private), + int (*fence_wait)(void *private, unsigned int cookie), + void *driver_priv) +{ + dri_bufmgr_fake *bufmgr_fake; + + bufmgr_fake = calloc(1, sizeof(*bufmgr_fake)); + + /* Initialize allocator */ + make_empty_list(&bufmgr_fake->fenced); + make_empty_list(&bufmgr_fake->on_hardware); + make_empty_list(&bufmgr_fake->lru); + + bufmgr_fake->low_offset = low_offset; + bufmgr_fake->virtual = low_virtual; + bufmgr_fake->size = size; + bufmgr_fake->heap = mmInit(low_offset, size); + + /* Hook in methods */ + bufmgr_fake->bufmgr.bo_alloc = dri_fake_bo_alloc; + bufmgr_fake->bufmgr.bo_alloc_static = dri_fake_bo_alloc_static; + bufmgr_fake->bufmgr.bo_reference = dri_fake_bo_reference; + bufmgr_fake->bufmgr.bo_unreference = dri_fake_bo_unreference; + bufmgr_fake->bufmgr.bo_map = dri_fake_bo_map; + bufmgr_fake->bufmgr.bo_unmap = dri_fake_bo_unmap; + bufmgr_fake->bufmgr.destroy = dri_fake_destroy; + bufmgr_fake->bufmgr.emit_reloc = dri_fake_emit_reloc; + bufmgr_fake->bufmgr.process_relocs = dri_fake_process_relocs; + bufmgr_fake->bufmgr.post_submit = dri_fake_post_submit; + bufmgr_fake->bufmgr.check_aperture_space = dri_fake_check_aperture_space; + bufmgr_fake->bufmgr.debug = GL_FALSE; + + bufmgr_fake->fence_emit = fence_emit; + bufmgr_fake->fence_wait = fence_wait; + bufmgr_fake->driver_priv = driver_priv; + + return &bufmgr_fake->bufmgr; +} + diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_fake.h b/src/mesa/drivers/dri/intel/intel_bufmgr_fake.h new file mode 100644 index 0000000000..bc7e59e61d --- /dev/null +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_fake.h @@ -0,0 +1,50 @@ +/************************************************************************** + * + * Copyright © 2007 Intel Corporation + * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ +/* + * Authors: Thomas Hellström + * Keith Whitwell + * Eric Anholt + */ + +#ifndef _INTEL_BUFMGR_FAKE_H_ +#define _INTEL_BUFMGR_FAKE_H_ + +void dri_bufmgr_fake_contended_lock_take(dri_bufmgr *bufmgr); +dri_bufmgr *dri_bufmgr_fake_init(unsigned long low_offset, void *low_virtual, + unsigned long size, + unsigned int (*fence_emit)(void *private), + int (*fence_wait)(void *private, + unsigned int cookie), + void *driver_priv); +void dri_bo_fake_disable_backing_store(dri_bo *bo, + void (*invalidate_cb)(dri_bo *bo, + void *ptr), + void *ptr); +#endif /* _INTEL_BUFMGR_FAKE_H_ */ + diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c index 69d90e19d8..5e16f9de0b 100644 --- a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c @@ -579,8 +579,8 @@ dri_bufmgr_gem_destroy(dri_bufmgr *bufmgr) * last known offset in target_bo. */ static int -dri_gem_emit_reloc(dri_bo *bo, uint64_t flags, GLuint delta, - GLuint offset, dri_bo *target_bo) +dri_gem_emit_reloc(dri_bo *bo, uint32_t read_domains, uint32_t write_domain, + uint32_t delta, uint32_t offset, dri_bo *target_bo) { dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bo->bufmgr; dri_bo_gem *bo_gem = (dri_bo_gem *)bo; @@ -601,18 +601,17 @@ dri_gem_emit_reloc(dri_bo *bo, uint64_t flags, GLuint delta, bo_gem->reloc_target_bo[bo_gem->reloc_count] = target_bo; dri_bo_reference(target_bo); - /** XXX set memory domains, using existing TTM flags (which is wrong) */ - if (flags & DRM_BO_FLAG_WRITE) - { - /* assume this means the rendering buffer */ - target_bo_gem->read_domains |= DRM_GEM_DOMAIN_I915_RENDER; - target_bo_gem->write_domain = DRM_GEM_DOMAIN_I915_RENDER; - } - if (flags & DRM_BO_FLAG_READ) - { - /* assume this means the sampler buffer */ - target_bo_gem->read_domains |= DRM_GEM_DOMAIN_I915_SAMPLER; - } + /* Just accumulate the read domains into the target buffer. We don't care + * enough about minimizing the flags associated with a buffer for a + * specific set of relocations being done against it. + */ + target_bo_gem->read_domains |= read_domains; + /* XXX: this is broken if we have more than one write domain. We + * would need to be computing the write domain on the buffer based on + * order of relocs in the batchbuffer. But we only have one write buffer. + */ + target_bo_gem->write_domain = write_domain; + bo_gem->reloc_count++; return 0; } @@ -699,9 +698,6 @@ dri_gem_post_submit(dri_bo *batch_buf) dri_bo *bo = bufmgr_gem->validate_bo[i]; dri_bo_gem *bo_gem = (dri_bo_gem *)bo; - /* clear read/write domain bits */ - bo_gem->read_domains = 0; - bo_gem->write_domain = 0; /* Disconnect the buffer from the validate list */ bo_gem->validate_index = -1; dri_bo_unreference(bo); diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index d258e669c0..ae9e53ce6e 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -59,6 +59,7 @@ #include "intel_buffer_objects.h" #include "intel_fbo.h" #include "intel_decode.h" +#include "intel_bufmgr_fake.h" #include "intel_bufmgr_gem.h" #include "drirenderbuffer.h" -- cgit v1.2.3 From ead798eb103e4cfe801704bc15eb4fe8df078fa8 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 7 May 2008 14:01:03 -0700 Subject: GEM: Remove already-disabled PIPE_CONTROL command. This existed to get the icache flushed. However, GEM handles this for us now for sure, and we had disabled it prematurely anyway. --- src/mesa/drivers/dri/i965/brw_misc_state.c | 34 ---------------------------- src/mesa/drivers/dri/i965/brw_state_upload.c | 1 - 2 files changed, 35 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 24dfd2e24e..c941e054a3 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -383,40 +383,6 @@ const struct brw_tracked_state brw_line_stipple = { }; - -/*********************************************************************** - * Misc constant state packets - */ - -static void upload_pipe_control(struct brw_context *brw) -{ - struct brw_pipe_control pc; - - return; - - memset(&pc, 0, sizeof(pc)); - - pc.header.opcode = CMD_PIPE_CONTROL; - pc.header.length = sizeof(pc)/4 - 2; - pc.header.post_sync_operation = PIPE_CONTROL_NOWRITE; - - pc.header.instruction_state_cache_flush_enable = 1; - - pc.bits1.dest_addr_type = PIPE_CONTROL_GTTWRITE_GLOBAL; - - BRW_BATCH_STRUCT(brw, &pc); -} - -const struct brw_tracked_state brw_pipe_control = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_BATCH, - .cache = 0 - }, - .emit = upload_pipe_control -}; - - /*********************************************************************** * Misc invarient state packets */ diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 3b2ccd48c3..b8dfcf5b03 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -80,7 +80,6 @@ const struct brw_tracked_state *atoms[] = */ &brw_invarient_state, &brw_state_base_address, - &brw_pipe_control, &brw_binding_table_pointers, &brw_blend_constant_color, -- cgit v1.2.3 From fda5687241f4ce5cab3bf2eac437b52d4b37dd10 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Thu, 8 May 2008 10:37:23 -0700 Subject: [intel] intel_batchbuffer_flush using uninit 'used' to check for buffer empty Make sure 'used' tracks the right value through the whole function. Also, use GLint for intel_batchbuffer_space in case we do bad things in the future. --- src/mesa/drivers/dri/intel/intel_batchbuffer.c | 10 +++++++--- src/mesa/drivers/dri/intel/intel_batchbuffer.h | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index f22e6c0967..b626e90476 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -195,7 +195,7 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, int line) { struct intel_context *intel = batch->intel; - GLuint used; + GLuint used = batch->ptr - batch->map; GLboolean was_locked = intel->locked; if (used == 0) @@ -209,19 +209,20 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, if (!intel->ttm) { *(GLuint *) (batch->ptr) = intel->vtbl.flush_cmd(); batch->ptr += 4; + used = batch->ptr - batch->map; } /* Round batchbuffer usage to 2 DWORDs. */ - used = batch->ptr - batch->map; + if ((used & 4) == 0) { *(GLuint *) (batch->ptr) = 0; /* noop */ batch->ptr += 4; + used = batch->ptr - batch->map; } /* Mark the end of the buffer. */ *(GLuint *) (batch->ptr) = MI_BATCH_BUFFER_END; /* noop */ batch->ptr += 4; - used = batch->ptr - batch->map; /* Workaround for recursive batchbuffer flushing: If the window is @@ -272,6 +273,9 @@ intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, { int ret; + if (batch->ptr - batch->map > batch->buf->size) + _mesa_printf ("bad relocation ptr %p map %p offset %d size %d\n", + batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size); ret = dri_emit_reloc(batch->buf, read_domains, write_domain, delta, batch->ptr - batch->map, buffer); diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h index 7268bd59da..5e8b14b401 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h @@ -88,7 +88,7 @@ GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, * be passed as structs rather than dwords, but that's a little bit of * work... */ -static INLINE GLuint +static INLINE GLint intel_batchbuffer_space(struct intel_batchbuffer *batch) { return (batch->size - BATCH_RESERVED) - (batch->ptr - batch->map); -- cgit v1.2.3 From 0cb006c1fdb75e1fe282120cc5455a4e8c59b1a7 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Thu, 8 May 2008 10:38:55 -0700 Subject: [intel-gem] move domains to relocations. add set_domain to bo_map. Fix the kernel API to place the read/write domain information in the relocation instead of the buffer. --- src/mesa/drivers/dri/intel/intel_bufmgr_gem.c | 42 +++++++++++++-------------- 1 file changed, 20 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c index 5e16f9de0b..399a6a12e3 100644 --- a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c @@ -123,8 +123,6 @@ typedef struct _dri_bo_gem { dri_bo **reloc_target_bo; /** Number of entries in relocs */ int reloc_count; - /** Memory domains for synchronization */ - uint32_t read_domains, write_domain; /** Mapped address for the buffer */ void *virtual; } dri_bo_gem; @@ -232,8 +230,6 @@ intel_add_validate_buffer(dri_bo *bo) bufmgr_gem->validate_array[index].relocs_ptr = (uintptr_t)bo_gem->relocs; bufmgr_gem->validate_array[index].alignment = 0; bufmgr_gem->validate_array[index].buffer_offset = 0; - bufmgr_gem->validate_array[index].read_domains = bo_gem->read_domains; - bufmgr_gem->validate_array[index].write_domain = bo_gem->write_domain; bufmgr_gem->validate_bo[index] = bo; dri_bo_reference(bo); bufmgr_gem->validate_count++; @@ -468,6 +464,7 @@ dri_gem_bo_map(dri_bo *bo, GLboolean write_enable) { dri_bufmgr_gem *bufmgr_gem; dri_bo_gem *bo_gem = (dri_bo_gem *)bo; + struct drm_gem_set_domain set_domain; int ret; bufmgr_gem = (dri_bufmgr_gem *)bo->bufmgr; @@ -493,14 +490,23 @@ dri_gem_bo_map(dri_bo *bo, GLboolean write_enable) if (ret != 0) { fprintf(stderr, "%s:%d: Error mapping buffer %d (%s): %s .\n", __FILE__, __LINE__, - bo_gem->gem_handle, bo_gem->name, strerror(-ret)); + bo_gem->gem_handle, bo_gem->name, strerror(errno)); } bo_gem->virtual = (void *)(uintptr_t)mmap_arg.addr_ptr; } - - /* XXX Synchronization with hardware */ - bo->virtual = bo_gem->virtual; + DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, bo_gem->virtual); + + set_domain.handle = bo_gem->gem_handle; + set_domain.read_domains = DRM_GEM_DOMAIN_CPU; + set_domain.write_domain = write_enable ? DRM_GEM_DOMAIN_CPU : 0; + ret = ioctl (bufmgr_gem->fd, DRM_IOCTL_GEM_SET_DOMAIN, &set_domain); + if (ret != 0) { + fprintf (stderr, "%s:%d: Error setting memory domains %d (%08x %08x): %s .\n", + __FILE__, __LINE__, + bo_gem->gem_handle, set_domain.read_domains, set_domain.write_domain, + strerror (errno)); + } return 0; } @@ -593,25 +599,20 @@ dri_gem_emit_reloc(dri_bo *bo, uint32_t read_domains, uint32_t write_domain, /* Check overflow */ assert(bo_gem->reloc_count < bufmgr_gem->max_relocs); + /* Check args */ + assert (offset <= bo->size - 4); + assert ((write_domain & (write_domain-1)) == 0); + bo_gem->relocs[bo_gem->reloc_count].offset = offset; bo_gem->relocs[bo_gem->reloc_count].delta = delta; bo_gem->relocs[bo_gem->reloc_count].target_handle = target_bo_gem->gem_handle; + bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains; + bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain; bo_gem->reloc_target_bo[bo_gem->reloc_count] = target_bo; dri_bo_reference(target_bo); - /* Just accumulate the read domains into the target buffer. We don't care - * enough about minimizing the flags associated with a buffer for a - * specific set of relocations being done against it. - */ - target_bo_gem->read_domains |= read_domains; - /* XXX: this is broken if we have more than one write domain. We - * would need to be computing the write domain on the buffer based on - * order of relocs in the batchbuffer. But we only have one write buffer. - */ - target_bo_gem->write_domain = write_domain; - bo_gem->reloc_count++; return 0; } @@ -644,11 +645,8 @@ dri_gem_bo_process_reloc(dri_bo *bo) static void * dri_gem_process_reloc(dri_bo *batch_buf) { - dri_bo_gem *bo_gem = (dri_bo_gem *)batch_buf; dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *) batch_buf->bufmgr; - bo_gem->read_domains |= DRM_GEM_DOMAIN_I915_COMMAND; - /* Update indices and set up the validate list. */ dri_gem_bo_process_reloc(batch_buf); -- cgit v1.2.3 From 145523ba3acb95a9ff390430a9e0a3fa958cae1b Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Sun, 11 May 2008 00:16:25 -0700 Subject: [intel] update GEM api. Add bo_subdata and bo_get_subdata driver hooks. Track DRM GEM name changes. Add driver hooks for bo_subdata and bo_get_subdata so that GEM can use pread and pwrite. --- src/mesa/drivers/dri/common/dri_bufmgr.c | 25 +++- src/mesa/drivers/dri/common/dri_bufmgr.h | 26 ++++- src/mesa/drivers/dri/intel/intel_batchbuffer.c | 26 ++++- src/mesa/drivers/dri/intel/intel_batchbuffer.h | 2 + src/mesa/drivers/dri/intel/intel_bufmgr_gem.c | 155 ++++++++++++++++--------- 5 files changed, 166 insertions(+), 68 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/common/dri_bufmgr.c b/src/mesa/drivers/dri/common/dri_bufmgr.c index 5967d7dafb..19ea2a8f86 100644 --- a/src/mesa/drivers/dri/common/dri_bufmgr.c +++ b/src/mesa/drivers/dri/common/dri_bufmgr.c @@ -90,28 +90,41 @@ dri_bo_unmap(dri_bo *buf) return buf->bufmgr->bo_unmap(buf); } -void +int dri_bo_subdata(dri_bo *bo, unsigned long offset, unsigned long size, const void *data) { + int ret; + if (bo->bufmgr->bo_subdata) + return bo->bufmgr->bo_subdata(bo, offset, size, data); if (size == 0 || data == NULL) - return; + return 0; - dri_bo_map(bo, GL_TRUE); + ret = dri_bo_map(bo, GL_TRUE); + if (ret) + return ret; memcpy((unsigned char *)bo->virtual + offset, data, size); dri_bo_unmap(bo); + return 0; } -void +int dri_bo_get_subdata(dri_bo *bo, unsigned long offset, unsigned long size, void *data) { + int ret; + if (bo->bufmgr->bo_subdata) + return bo->bufmgr->bo_get_subdata(bo, offset, size, data); + if (size == 0 || data == NULL) - return; + return 0; - dri_bo_map(bo, GL_FALSE); + ret = dri_bo_map(bo, GL_FALSE); + if (ret) + return ret; memcpy(data, (unsigned char *)bo->virtual + offset, size); dri_bo_unmap(bo); + return 0; } void diff --git a/src/mesa/drivers/dri/common/dri_bufmgr.h b/src/mesa/drivers/dri/common/dri_bufmgr.h index 99cfb2cd05..29f9aea2b1 100644 --- a/src/mesa/drivers/dri/common/dri_bufmgr.h +++ b/src/mesa/drivers/dri/common/dri_bufmgr.h @@ -109,6 +109,24 @@ struct _dri_bufmgr { /** Reduces the refcount on the userspace mapping of the buffer object. */ int (*bo_unmap)(dri_bo *buf); + /** + * Write data into an object. + * + * This is an optional function, if missing, + * dri_bo will map/memcpy/unmap. + */ + int (*bo_subdata) (dri_bo *buf, unsigned long offset, + unsigned long size, const void *data); + + /** + * Read data from an object + * + * This is an optional function, if missing, + * dri_bo will map/memcpy/unmap. + */ + int (*bo_get_subdata) (dri_bo *bo, unsigned long offset, + unsigned long size, void *data); + /** * Tears down the buffer manager instance. */ @@ -170,10 +188,10 @@ void dri_bo_unreference(dri_bo *bo); int dri_bo_map(dri_bo *buf, GLboolean write_enable); int dri_bo_unmap(dri_bo *buf); -void dri_bo_subdata(dri_bo *bo, unsigned long offset, - unsigned long size, const void *data); -void dri_bo_get_subdata(dri_bo *bo, unsigned long offset, - unsigned long size, void *data); +int dri_bo_subdata(dri_bo *bo, unsigned long offset, + unsigned long size, const void *data); +int dri_bo_get_subdata(dri_bo *bo, unsigned long offset, + unsigned long size, void *data); void dri_bufmgr_set_debug(dri_bufmgr *bufmgr, GLboolean enable_debug); void dri_bufmgr_destroy(dri_bufmgr *bufmgr); diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index b626e90476..803ff5e90e 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -78,11 +78,18 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch) batch->buf = NULL; } + if (!batch->buffer && intel->ttm == GL_TRUE) + batch->buffer = malloc (intel->maxBatchSize); + batch->buf = dri_bo_alloc(intel->bufmgr, "batchbuffer", intel->maxBatchSize, 4096, DRM_BO_FLAG_MEM_LOCAL | DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED); - dri_bo_map(batch->buf, GL_TRUE); - batch->map = batch->buf->virtual; + if (batch->buffer) + batch->map = batch->buffer; + else { + dri_bo_map(batch->buf, GL_TRUE); + batch->map = batch->buf->virtual; + } batch->size = intel->maxBatchSize; batch->ptr = batch->map; batch->dirty_state = ~0; @@ -107,9 +114,13 @@ intel_batchbuffer_alloc(struct intel_context *intel) void intel_batchbuffer_free(struct intel_batchbuffer *batch) { - if (batch->map) { - dri_bo_unmap(batch->buf); - batch->map = NULL; + if (batch->buffer) + free (batch->buffer); + else { + if (batch->map) { + dri_bo_unmap(batch->buf); + batch->map = NULL; + } } dri_bo_unreference(batch->buf); batch->buf = NULL; @@ -127,7 +138,10 @@ do_flush_locked(struct intel_batchbuffer *batch, struct intel_context *intel = batch->intel; int ret = 0; - dri_bo_unmap(batch->buf); + if (batch->buffer) + dri_bo_subdata (batch->buf, 0, used, batch->buffer); + else + dri_bo_unmap(batch->buf); batch->map = NULL; batch->ptr = NULL; diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h index 5e8b14b401..d3c656c803 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h @@ -41,6 +41,8 @@ struct intel_batchbuffer dri_bo *buf; + GLubyte *buffer; + GLubyte *map; GLubyte *ptr; diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c index 399a6a12e3..2de08f6529 100644 --- a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c @@ -92,10 +92,10 @@ typedef struct _dri_bufmgr_gem { uint32_t max_relocs; - struct drm_i915_gem_validate_entry *validate_array; - dri_bo **validate_bo; - int validate_array_size; - int validate_count; + struct drm_i915_gem_exec_object *exec_objects; + dri_bo **exec_bos; + int exec_size; + int exec_count; /** Array of lists of cached gem objects of power-of-two sizes */ struct dri_gem_bo_bucket cache_bucket[INTEL_GEM_BO_BUCKETS]; @@ -166,8 +166,8 @@ static void dri_gem_dump_validation_list(dri_bufmgr_gem *bufmgr_gem) { int i, j; - for (i = 0; i < bufmgr_gem->validate_count; i++) { - dri_bo *bo = bufmgr_gem->validate_bo[i]; + for (i = 0; i < bufmgr_gem->exec_count; i++) { + dri_bo *bo = bufmgr_gem->exec_bos[i]; dri_bo_gem *bo_gem = (dri_bo_gem *)bo; if (bo_gem->relocs == NULL) { @@ -207,32 +207,32 @@ intel_add_validate_buffer(dri_bo *bo) return; /* Extend the array of validation entries as necessary. */ - if (bufmgr_gem->validate_count == bufmgr_gem->validate_array_size) { - int new_size = bufmgr_gem->validate_array_size * 2; + if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { + int new_size = bufmgr_gem->exec_size * 2; if (new_size == 0) new_size = 5; - bufmgr_gem->validate_array = - realloc(bufmgr_gem->validate_array, - sizeof(*bufmgr_gem->validate_array) * new_size); - bufmgr_gem->validate_bo = - realloc(bufmgr_gem->validate_bo, - sizeof(*bufmgr_gem->validate_bo) * new_size); - bufmgr_gem->validate_array_size = new_size; + bufmgr_gem->exec_objects = + realloc(bufmgr_gem->exec_objects, + sizeof(*bufmgr_gem->exec_objects) * new_size); + bufmgr_gem->exec_bos = + realloc(bufmgr_gem->exec_bos, + sizeof(*bufmgr_gem->exec_bos) * new_size); + bufmgr_gem->exec_size = new_size; } - index = bufmgr_gem->validate_count; + index = bufmgr_gem->exec_count; bo_gem->validate_index = index; /* Fill in array entry */ - bufmgr_gem->validate_array[index].buffer_handle = bo_gem->gem_handle; - bufmgr_gem->validate_array[index].relocation_count = bo_gem->reloc_count; - bufmgr_gem->validate_array[index].relocs_ptr = (uintptr_t)bo_gem->relocs; - bufmgr_gem->validate_array[index].alignment = 0; - bufmgr_gem->validate_array[index].buffer_offset = 0; - bufmgr_gem->validate_bo[index] = bo; + bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle; + bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count; + bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs; + bufmgr_gem->exec_objects[index].alignment = 0; + bufmgr_gem->exec_objects[index].offset = 0; + bufmgr_gem->exec_bos[index] = bo; dri_bo_reference(bo); - bufmgr_gem->validate_count++; + bufmgr_gem->exec_count++; } @@ -309,13 +309,13 @@ dri_gem_bo_alloc(dri_bufmgr *bufmgr, const char *name, } if (!alloc_from_cache) { - struct drm_gem_alloc alloc; + struct drm_gem_create create; - memset(&alloc, 0, sizeof(alloc)); - alloc.size = bo_gem->bo.size; + memset(&create, 0, sizeof(create)); + create.size = bo_gem->bo.size; - ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_ALLOC, &alloc); - bo_gem->gem_handle = alloc.handle; + ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CREATE, &create); + bo_gem->gem_handle = create.handle; if (ret != 0) { free(bo_gem); return NULL; @@ -439,14 +439,14 @@ dri_gem_bo_unreference(dri_bo *bo) bucket->tail = &entry->next; bucket->num_entries++; } else { - struct drm_gem_unreference unref; + struct drm_gem_close close; - /* Decrement the kernel refcount for the buffer. */ - unref.handle = bo_gem->gem_handle; - ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_UNREFERENCE, &unref); + /* Close this object */ + close.handle = bo_gem->gem_handle; + ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close); if (ret != 0) { fprintf(stderr, - "DRM_IOCTL_GEM_UNREFERENCE %d failed (%s): %s\n", + "DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n", bo_gem->gem_handle, bo_gem->name, strerror(-ret)); } } @@ -537,14 +537,62 @@ dri_gem_bo_unmap(dri_bo *bo) return 0; } +static int +dri_gem_bo_subdata (dri_bo *bo, unsigned long offset, + unsigned long size, const void *data) +{ + dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bo->bufmgr; + dri_bo_gem *bo_gem = (dri_bo_gem *)bo; + struct drm_gem_pwrite pwrite; + int ret; + + memset (&pwrite, 0, sizeof (pwrite)); + pwrite.handle = bo_gem->gem_handle; + pwrite.offset = offset; + pwrite.size = size; + pwrite.data_ptr = (uint64_t) (uintptr_t) data; + ret = ioctl (bufmgr_gem->fd, DRM_IOCTL_GEM_PWRITE, &pwrite); + if (ret != 0) { + fprintf (stderr, "%s:%d: Error writing data to buffer %d: (%d %d) %s .\n", + __FILE__, __LINE__, + bo_gem->gem_handle, (int) offset, (int) size, + strerror (errno)); + } + return 0; +} + +static int +dri_gem_bo_get_subdata (dri_bo *bo, unsigned long offset, + unsigned long size, void *data) +{ + dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bo->bufmgr; + dri_bo_gem *bo_gem = (dri_bo_gem *)bo; + struct drm_gem_pread pread; + int ret; + + memset (&pread, 0, sizeof (pread)); + pread.handle = bo_gem->gem_handle; + pread.offset = offset; + pread.size = size; + pread.data_ptr = (uint64_t) (uintptr_t) data; + ret = ioctl (bufmgr_gem->fd, DRM_IOCTL_GEM_PREAD, &pread); + if (ret != 0) { + fprintf (stderr, "%s:%d: Error reading data from buffer %d: (%d %d) %s .\n", + __FILE__, __LINE__, + bo_gem->gem_handle, (int) offset, (int) size, + strerror (errno)); + } + return 0; +} + static void dri_bufmgr_gem_destroy(dri_bufmgr *bufmgr) { dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bufmgr; int i; - free(bufmgr_gem->validate_array); - free(bufmgr_gem->validate_bo); + free(bufmgr_gem->exec_objects); + free(bufmgr_gem->exec_bos); /* Free any cached buffer objects we were going to reuse */ for (i = 0; i < INTEL_GEM_BO_BUCKETS; i++) { @@ -552,7 +600,7 @@ dri_bufmgr_gem_destroy(dri_bufmgr *bufmgr) struct dri_gem_bo_bucket_entry *entry; while ((entry = bucket->head) != NULL) { - struct drm_gem_unreference unref; + struct drm_gem_close close; int ret; bucket->head = entry->next; @@ -560,11 +608,11 @@ dri_bufmgr_gem_destroy(dri_bufmgr *bufmgr) bucket->tail = &bucket->head; bucket->num_entries--; - /* Decrement the kernel refcount for the buffer. */ - unref.handle = entry->gem_handle; - ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_UNREFERENCE, &unref); + /* Close this object */ + close.handle = entry->gem_handle; + ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close); if (ret != 0) { - fprintf(stderr, "DRM_IOCTL_GEM_UNREFERENCE failed: %s\n", + fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed: %s\n", strerror(-ret)); } @@ -655,9 +703,10 @@ dri_gem_process_reloc(dri_bo *batch_buf) */ intel_add_validate_buffer(batch_buf); - bufmgr_gem->exec_arg.buffers_ptr = (uintptr_t)bufmgr_gem->validate_array; - bufmgr_gem->exec_arg.buffer_count = bufmgr_gem->validate_count; - bufmgr_gem->exec_arg.batch_start_offset = bufmgr_gem->validate_count; + bufmgr_gem->exec_arg.buffers_ptr = (uintptr_t)bufmgr_gem->exec_objects; + bufmgr_gem->exec_arg.buffer_count = bufmgr_gem->exec_count; + bufmgr_gem->exec_arg.batch_start_offset = 0; + bufmgr_gem->exec_arg.batch_len = 0; /* written in intel_exec_ioctl */ return &bufmgr_gem->exec_arg; } @@ -667,16 +716,16 @@ intel_update_buffer_offsets (dri_bufmgr_gem *bufmgr_gem) { int i; - for (i = 0; i < bufmgr_gem->validate_count; i++) { - dri_bo *bo = bufmgr_gem->validate_bo[i]; + for (i = 0; i < bufmgr_gem->exec_count; i++) { + dri_bo *bo = bufmgr_gem->exec_bos[i]; dri_bo_gem *bo_gem = (dri_bo_gem *)bo; /* Update the buffer offset */ - if (bufmgr_gem->validate_array[i].buffer_offset != bo->offset) { + if (bufmgr_gem->exec_objects[i].offset != bo->offset) { DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", bo_gem->gem_handle, bo_gem->name, bo->offset, - bufmgr_gem->validate_array[i].buffer_offset); - bo->offset = bufmgr_gem->validate_array[i].buffer_offset; + bufmgr_gem->exec_objects[i].offset); + bo->offset = bufmgr_gem->exec_objects[i].offset; } } } @@ -692,16 +741,16 @@ dri_gem_post_submit(dri_bo *batch_buf) if (bufmgr_gem->bufmgr.debug) dri_gem_dump_validation_list(bufmgr_gem); - for (i = 0; i < bufmgr_gem->validate_count; i++) { - dri_bo *bo = bufmgr_gem->validate_bo[i]; + for (i = 0; i < bufmgr_gem->exec_count; i++) { + dri_bo *bo = bufmgr_gem->exec_bos[i]; dri_bo_gem *bo_gem = (dri_bo_gem *)bo; /* Disconnect the buffer from the validate list */ bo_gem->validate_index = -1; dri_bo_unreference(bo); - bufmgr_gem->validate_bo[i] = NULL; + bufmgr_gem->exec_bos[i] = NULL; } - bufmgr_gem->validate_count = 0; + bufmgr_gem->exec_count = 0; } /** @@ -762,6 +811,8 @@ intel_bufmgr_gem_init(int fd, int batch_size) bufmgr_gem->bufmgr.bo_unreference = dri_gem_bo_unreference; bufmgr_gem->bufmgr.bo_map = dri_gem_bo_map; bufmgr_gem->bufmgr.bo_unmap = dri_gem_bo_unmap; + bufmgr_gem->bufmgr.bo_subdata = dri_gem_bo_subdata; + bufmgr_gem->bufmgr.bo_get_subdata = dri_gem_bo_get_subdata; bufmgr_gem->bufmgr.destroy = dri_bufmgr_gem_destroy; bufmgr_gem->bufmgr.emit_reloc = dri_gem_emit_reloc; bufmgr_gem->bufmgr.process_relocs = dri_gem_process_reloc; -- cgit v1.2.3 From cba90d4a778ab5202f2f7547ad7ffeb84216d07b Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 13 May 2008 11:30:18 -0700 Subject: [GEM] Actually include the presumed offset in initial relocations. This avoids kernel relocations for most batchbuffer relocs. --- src/mesa/drivers/dri/intel/intel_bufmgr_gem.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c index 2de08f6529..a14f7830f9 100644 --- a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c @@ -657,6 +657,7 @@ dri_gem_emit_reloc(dri_bo *bo, uint32_t read_domains, uint32_t write_domain, target_bo_gem->gem_handle; bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains; bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain; + bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset; bo_gem->reloc_target_bo[bo_gem->reloc_count] = target_bo; dri_bo_reference(target_bo); -- cgit v1.2.3 From 8b49cc104dd556218fc769178b96f4a8a428d057 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Sat, 17 May 2008 23:34:47 -0700 Subject: [intel-gem] Don't calloc reloc buffers Only a few relocations are typically used, so don't clear the whole thing. --- src/mesa/drivers/dri/intel/intel_bufmgr_gem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c index a14f7830f9..e08e3952bc 100644 --- a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c @@ -245,9 +245,9 @@ intel_setup_reloc_list(dri_bo *bo) dri_bo_gem *bo_gem = (dri_bo_gem *)bo; dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bo->bufmgr; - bo_gem->relocs = calloc(bufmgr_gem->max_relocs, + bo_gem->relocs = malloc(bufmgr_gem->max_relocs * sizeof(struct drm_i915_gem_relocation_entry)); - bo_gem->reloc_target_bo = calloc(bufmgr_gem->max_relocs, sizeof(dri_bo *)); + bo_gem->reloc_target_bo = malloc(bufmgr_gem->max_relocs * sizeof(dri_bo *)); return 0; } -- cgit v1.2.3 From 76286bc76c5ea2217378809a9dcab6794aae7b5e Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Thu, 22 May 2008 10:44:47 -0700 Subject: [intel-gem] Make sure set_domain is called often enough. The write_domain needs to be set after any batch buffer uses an object, track when that happens in the new 'cpu_domain_set' field. --- src/mesa/drivers/dri/intel/intel_bufmgr_gem.c | 75 ++++++++++++++++----------- 1 file changed, 44 insertions(+), 31 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c index e08e3952bc..b472a8f6e1 100644 --- a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c @@ -117,6 +117,13 @@ typedef struct _dri_bo_gem { */ int validate_index; + /** + * Tracks whether set_domain to CPU is current + * Set when set_domain has been called + * Cleared when a batch has been submitted + */ + GLboolean cpu_domain_set; + /** Array passed to the DRM containing relocation information. */ struct drm_i915_gem_relocation_entry *relocs; /** Array of bos corresponding to relocs[i].target_handle */ @@ -472,40 +479,43 @@ dri_gem_bo_map(dri_bo *bo, GLboolean write_enable) /* Allow recursive mapping. Mesa may recursively map buffers with * nested display loops. */ - if (bo_gem->map_count++ != 0) - return 0; - - assert(bo->virtual == NULL); - - DBG("bo_map: %d (%s)\n", bo_gem->gem_handle, bo_gem->name); - - if (bo_gem->virtual == NULL) { - struct drm_gem_mmap mmap_arg; - - memset(&mmap_arg, 0, sizeof(mmap_arg)); - mmap_arg.handle = bo_gem->gem_handle; - mmap_arg.offset = 0; - mmap_arg.size = bo->size; - ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_MMAP, &mmap_arg); - if (ret != 0) { - fprintf(stderr, "%s:%d: Error mapping buffer %d (%s): %s .\n", - __FILE__, __LINE__, - bo_gem->gem_handle, bo_gem->name, strerror(errno)); + if (bo_gem->map_count++ == 0) { + + assert(bo->virtual == NULL); + + DBG("bo_map: %d (%s)\n", bo_gem->gem_handle, bo_gem->name); + + if (bo_gem->virtual == NULL) { + struct drm_gem_mmap mmap_arg; + + memset(&mmap_arg, 0, sizeof(mmap_arg)); + mmap_arg.handle = bo_gem->gem_handle; + mmap_arg.offset = 0; + mmap_arg.size = bo->size; + ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_MMAP, &mmap_arg); + if (ret != 0) { + fprintf(stderr, "%s:%d: Error mapping buffer %d (%s): %s .\n", + __FILE__, __LINE__, + bo_gem->gem_handle, bo_gem->name, strerror(errno)); + } + bo_gem->virtual = (void *)(uintptr_t)mmap_arg.addr_ptr; } - bo_gem->virtual = (void *)(uintptr_t)mmap_arg.addr_ptr; + bo->virtual = bo_gem->virtual; + DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, bo_gem->virtual); } - bo->virtual = bo_gem->virtual; - DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, bo_gem->virtual); - set_domain.handle = bo_gem->gem_handle; - set_domain.read_domains = DRM_GEM_DOMAIN_CPU; - set_domain.write_domain = write_enable ? DRM_GEM_DOMAIN_CPU : 0; - ret = ioctl (bufmgr_gem->fd, DRM_IOCTL_GEM_SET_DOMAIN, &set_domain); - if (ret != 0) { - fprintf (stderr, "%s:%d: Error setting memory domains %d (%08x %08x): %s .\n", - __FILE__, __LINE__, - bo_gem->gem_handle, set_domain.read_domains, set_domain.write_domain, - strerror (errno)); + if (!bo_gem->cpu_domain_set) { + set_domain.handle = bo_gem->gem_handle; + set_domain.read_domains = DRM_GEM_DOMAIN_CPU; + set_domain.write_domain = write_enable ? DRM_GEM_DOMAIN_CPU : 0; + ret = ioctl (bufmgr_gem->fd, DRM_IOCTL_GEM_SET_DOMAIN, &set_domain); + if (ret != 0) { + fprintf (stderr, "%s:%d: Error setting memory domains %d (%08x %08x): %s .\n", + __FILE__, __LINE__, + bo_gem->gem_handle, set_domain.read_domains, set_domain.write_domain, + strerror (errno)); + } + bo_gem->cpu_domain_set = GL_TRUE; } return 0; @@ -746,6 +756,9 @@ dri_gem_post_submit(dri_bo *batch_buf) dri_bo *bo = bufmgr_gem->exec_bos[i]; dri_bo_gem *bo_gem = (dri_bo_gem *)bo; + /* Need to call set_domain on next bo_map */ + bo_gem->cpu_domain_set = GL_FALSE; + /* Disconnect the buffer from the validate list */ bo_gem->validate_index = -1; dri_bo_unreference(bo); -- cgit v1.2.3 From 6cefae5354fb3015c5a14677071871613faa9c3a Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 19 May 2008 15:42:00 -0700 Subject: Add back a mostly-correct glFinish for GEM and fake. The right solution would probably be keeping a list of regions which have been rendered to. --- src/mesa/drivers/dri/common/dri_bufmgr.c | 6 ++++++ src/mesa/drivers/dri/common/dri_bufmgr.h | 9 +++++++++ src/mesa/drivers/dri/intel/intel_bufmgr_fake.c | 10 +++++++--- src/mesa/drivers/dri/intel/intel_bufmgr_gem.c | 21 +++++++++++++++++++++ src/mesa/drivers/dri/intel/intel_context.c | 7 ++++++- 5 files changed, 49 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/common/dri_bufmgr.c b/src/mesa/drivers/dri/common/dri_bufmgr.c index 19ea2a8f86..be2a7b740c 100644 --- a/src/mesa/drivers/dri/common/dri_bufmgr.c +++ b/src/mesa/drivers/dri/common/dri_bufmgr.c @@ -127,6 +127,12 @@ dri_bo_get_subdata(dri_bo *bo, unsigned long offset, return 0; } +void +dri_bo_wait_rendering(dri_bo *bo) +{ + bo->bufmgr->bo_wait_rendering(bo); +} + void dri_bufmgr_destroy(dri_bufmgr *bufmgr) { diff --git a/src/mesa/drivers/dri/common/dri_bufmgr.h b/src/mesa/drivers/dri/common/dri_bufmgr.h index 29f9aea2b1..1abca08cc8 100644 --- a/src/mesa/drivers/dri/common/dri_bufmgr.h +++ b/src/mesa/drivers/dri/common/dri_bufmgr.h @@ -127,6 +127,14 @@ struct _dri_bufmgr { int (*bo_get_subdata) (dri_bo *bo, unsigned long offset, unsigned long size, void *data); + /** + * Waits for rendering to an object by the GPU to have completed. + * + * This is not required for any access to the BO by bo_map, bo_subdata, etc. + * It is merely a way for the driver to implement glFinish. + */ + void (*bo_wait_rendering) (dri_bo *bo); + /** * Tears down the buffer manager instance. */ @@ -192,6 +200,7 @@ int dri_bo_subdata(dri_bo *bo, unsigned long offset, unsigned long size, const void *data); int dri_bo_get_subdata(dri_bo *bo, unsigned long offset, unsigned long size, void *data); +void dri_bo_wait_rendering(dri_bo *bo); void dri_bufmgr_set_debug(dri_bufmgr *bufmgr, GLboolean enable_debug); void dri_bufmgr_destroy(dri_bufmgr *bufmgr); diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_fake.c b/src/mesa/drivers/dri/intel/intel_bufmgr_fake.c index 5d01a471c5..2aed3d85be 100644 --- a/src/mesa/drivers/dri/intel/intel_bufmgr_fake.c +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_fake.c @@ -533,10 +533,13 @@ dri_bufmgr_fake_wait_idle(dri_bufmgr_fake *bufmgr_fake) } /** - * Wait for execution pending on a buffer + * Wait for rendering to a buffer to complete. + * + * It is assumed that the bathcbuffer which performed the rendering included + * the necessary flushing. */ static void -dri_bufmgr_fake_bo_wait_idle(dri_bo *bo) +dri_fake_bo_wait_rendering(dri_bo *bo) { dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; dri_bo_fake *bo_fake = (dri_bo_fake *)bo; @@ -757,7 +760,7 @@ dri_fake_bo_map(dri_bo *bo, GLboolean write_enable) if (!(bo_fake->flags & BM_NO_FENCE_SUBDATA) && bo_fake->block->fenced) { - dri_bufmgr_fake_bo_wait_idle(bo); + dri_fake_bo_wait_rendering(bo); } bo->virtual = bo_fake->block->virtual; @@ -1157,6 +1160,7 @@ dri_bufmgr_fake_init(unsigned long low_offset, void *low_virtual, bufmgr_fake->bufmgr.bo_unreference = dri_fake_bo_unreference; bufmgr_fake->bufmgr.bo_map = dri_fake_bo_map; bufmgr_fake->bufmgr.bo_unmap = dri_fake_bo_unmap; + bufmgr_fake->bufmgr.bo_wait_rendering = dri_fake_bo_wait_rendering; bufmgr_fake->bufmgr.destroy = dri_fake_destroy; bufmgr_fake->bufmgr.emit_reloc = dri_fake_emit_reloc; bufmgr_fake->bufmgr.process_relocs = dri_fake_process_relocs; diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c index b472a8f6e1..8638d0af1a 100644 --- a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c @@ -595,6 +595,26 @@ dri_gem_bo_get_subdata (dri_bo *bo, unsigned long offset, return 0; } +static void +dri_gem_bo_wait_rendering(dri_bo *bo) +{ + dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bo->bufmgr; + dri_bo_gem *bo_gem = (dri_bo_gem *)bo; + struct drm_gem_set_domain set_domain; + int ret; + + set_domain.handle = bo_gem->gem_handle; + set_domain.read_domains = DRM_GEM_DOMAIN_CPU; + set_domain.write_domain = 0; + ret = ioctl (bufmgr_gem->fd, DRM_IOCTL_GEM_SET_DOMAIN, &set_domain); + if (ret != 0) { + fprintf (stderr, "%s:%d: Error setting memory domains %d (%08x %08x): %s .\n", + __FILE__, __LINE__, + bo_gem->gem_handle, set_domain.read_domains, set_domain.write_domain, + strerror (errno)); + } +} + static void dri_bufmgr_gem_destroy(dri_bufmgr *bufmgr) { @@ -827,6 +847,7 @@ intel_bufmgr_gem_init(int fd, int batch_size) bufmgr_gem->bufmgr.bo_unmap = dri_gem_bo_unmap; bufmgr_gem->bufmgr.bo_subdata = dri_gem_bo_subdata; bufmgr_gem->bufmgr.bo_get_subdata = dri_gem_bo_get_subdata; + bufmgr_gem->bufmgr.bo_wait_rendering = dri_gem_bo_wait_rendering; bufmgr_gem->bufmgr.destroy = dri_bufmgr_gem_destroy; bufmgr_gem->bufmgr.emit_reloc = dri_gem_emit_reloc; bufmgr_gem->bufmgr.process_relocs = dri_gem_process_reloc; diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index ae9e53ce6e..6f187f719b 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -375,7 +375,12 @@ intelFinish(GLcontext * ctx) intelFlush(ctx); for (i = 0; i < fb->_NumColorDrawBuffers; i++) { - /* XXX: Wait on buffer idle */ + struct intel_renderbuffer *irb; + + irb = intel_renderbuffer(fb->_ColorDrawBuffers[i]); + + if (irb->region) + dri_bo_wait_rendering(irb->region->buffer); } if (fb->_DepthBuffer) { /* XXX: Wait on buffer idle */ -- cgit v1.2.3 From a74bf4ef345d880d7d296313fed0240781d2ebd8 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 23 May 2008 12:18:50 -0700 Subject: Emit a flush after the swapbuffers blit, so contents end up on the screen. Otherwise, since the MI_FLUSH at the end of every batch had been removed, non-automatic-flushing chips (965) wouldn't get flushed and apps with static rendering would get partial screen contents until the server's blockhandler flush kicked in. --- src/mesa/drivers/dri/i915/i830_reg.h | 4 ---- src/mesa/drivers/dri/i915/i915_reg.h | 6 ------ src/mesa/drivers/dri/i965/brw_defines.h | 1 - src/mesa/drivers/dri/intel/intel_blit.c | 8 ++++++++ src/mesa/drivers/dri/intel/intel_reg.h | 4 ++++ 5 files changed, 12 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i915/i830_reg.h b/src/mesa/drivers/dri/i915/i830_reg.h index 41280bca7c..d1084a84c0 100644 --- a/src/mesa/drivers/dri/i915/i830_reg.h +++ b/src/mesa/drivers/dri/i915/i830_reg.h @@ -635,8 +635,4 @@ #define ENABLE_TEX_STREAM_MAP_IDX (1<<3) #define TEX_STREAM_MAP_IDX(x) (x) - -#define MI_FLUSH ((0<<29)|(4<<23)) -#define FLUSH_MAP_CACHE (1<<0) - #endif diff --git a/src/mesa/drivers/dri/i915/i915_reg.h b/src/mesa/drivers/dri/i915/i915_reg.h index b5585e70e7..b718b8610c 100644 --- a/src/mesa/drivers/dri/i915/i915_reg.h +++ b/src/mesa/drivers/dri/i915/i915_reg.h @@ -855,10 +855,4 @@ #define _3DSTATE_DEFAULT_DIFFUSE ((0x3<<29)|(0x1d<<24)|(0x99<<16)) #define _3DSTATE_DEFAULT_SPECULAR ((0x3<<29)|(0x1d<<24)|(0x9a<<16)) - -#define MI_FLUSH ((0<<29)|(4<<23)) -#define FLUSH_MAP_CACHE (1<<0) -#define INHIBIT_FLUSH_RENDER_CACHE (1<<2) - - #endif diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 3aada8cab1..0fb531b1ee 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -38,7 +38,6 @@ #define MI_NOOP 0x00 #define MI_USER_INTERRUPT 0x02 #define MI_WAIT_FOR_EVENT 0x03 -#define MI_FLUSH 0x04 #define MI_REPORT_HEAD 0x07 #define MI_ARB_ON_OFF 0x08 #define MI_BATCH_BUFFER_END 0x0A diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index 3d7f64e28b..a1f4997312 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -159,6 +159,14 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv, ADVANCE_BATCH(); } + /* Emit a flush so that, on systems where we don't have automatic flushing + * set (such as 965), the results all land on the screen in a timely + * fashion. + */ + BEGIN_BATCH(1, IGNORE_CLIPRECTS); + OUT_BATCH(MI_FLUSH); + ADVANCE_BATCH(); + intel_batchbuffer_flush(intel->batch); } diff --git a/src/mesa/drivers/dri/intel/intel_reg.h b/src/mesa/drivers/dri/intel/intel_reg.h index 37629c07e2..c12ccf4ae1 100644 --- a/src/mesa/drivers/dri/intel/intel_reg.h +++ b/src/mesa/drivers/dri/intel/intel_reg.h @@ -31,6 +31,10 @@ #define MI_BATCH_BUFFER_END (CMD_MI | 0xA << 23) +#define MI_FLUSH (CMD_MI | (4 << 23)) +#define FLUSH_MAP_CACHE (1 << 0) +#define INHIBIT_FLUSH_RENDER_CACHE (1 << 2) + /* Stalls command execution waiting for the given events to have occurred. */ #define MI_WAIT_FOR_EVENT (CMD_MI | (0x3 << 23)) #define MI_WAIT_FOR_PLANE_B_FLIP (1<<6) -- cgit v1.2.3 From 200fd13d4a605bea07857d6c3e7c85ee07d663cb Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 23 May 2008 12:43:05 -0700 Subject: Remove stale comment about glFlush(). We don't need an MI_FLUSH there, because everything that's been flushed in the batch will eventually hit the hardware. --- src/mesa/drivers/dri/intel/intel_context.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 6f187f719b..e1941c302c 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -361,9 +361,6 @@ intelFlush(GLcontext * ctx) if (intel->batch->map != intel->batch->ptr) intel_batchbuffer_flush(intel->batch); - - /* XXX: Need to do an MI_FLUSH here. - */ } void -- cgit v1.2.3 From 8ba91b4636a04145e683e7d7fe5ee5ff404d73ac Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Sun, 25 May 2008 20:49:53 -0700 Subject: [intel] Enable buffer re-use for gem Use the new DRM_IOCTL_I915_GEM_BUSY ioctl to detect idle buffers for re-use. --- src/mesa/drivers/dri/intel/intel_bufmgr_gem.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c index 8638d0af1a..f561b71ebb 100644 --- a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c @@ -293,16 +293,11 @@ dri_gem_bo_alloc(dri_bufmgr *bufmgr, const char *name, /* Get a buffer out of the cache if available */ if (bucket != NULL && bucket->num_entries > 0) { struct dri_gem_bo_bucket_entry *entry = bucket->head; -#if 0 - int busy; - - /* XXX */ - /* Check if the buffer is still in flight. If not, reuse it. */ - ret = drmBOBusy(bufmgr_gem->fd, &entry->drm_bo, &busy); - alloc_from_cache = (ret == 0 && busy == 0); -#else - alloc_from_cache = 0; -#endif + struct drm_i915_gem_busy busy; + + busy.handle = entry->gem_handle; + ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); + alloc_from_cache = (ret == 0 && busy.busy == 0); if (alloc_from_cache) { bucket->head = entry->next; @@ -797,14 +792,12 @@ dri_gem_post_submit(dri_bo *batch_buf) void intel_gem_enable_bo_reuse(dri_bufmgr *bufmgr) { - /* dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bufmgr; int i; for (i = 0; i < INTEL_GEM_BO_BUCKETS; i++) { bufmgr_gem->cache_bucket[i].max_entries = -1; } - */ } /* -- cgit v1.2.3 From 924eaa2f955ecdc1080f5a8fdc165367a576a919 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Mon, 26 May 2008 00:19:20 -0700 Subject: [intel] all flushing in intelEmitCopyBlit Add both MI_FLUSH and intel_batchbuffer_flush to intelEmitCopyBlit. This ensures that the data are flushed *and* the gem kernel driver sees the various memory domain transitions. --- src/mesa/drivers/dri/intel/intel_blit.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index a1f4997312..e8d2ad0ae4 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -375,6 +375,10 @@ intelEmitCopyBlit(struct intel_context *intel, src_offset + src_y * src_pitch); ADVANCE_BATCH(); } + BEGIN_BATCH(1, NO_LOOP_CLIPRECTS); + OUT_BATCH(MI_FLUSH); + ADVANCE_BATCH(); + intel_batchbuffer_flush(intel->batch); } -- cgit v1.2.3 From d8395f9d9eed4040d6fa12f1631dd7c372c73be4 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Mon, 26 May 2008 17:51:38 -0700 Subject: [intel-gem] Once mapped, leave buffers mapped. Mapping and unmapping buffers is expensive, and having the map around isn't harmful (other than consuming address space). So, once mapped, just leave buffers mapped in case they get re-used. --- src/mesa/drivers/dri/intel/intel_bufmgr_gem.c | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c index f561b71ebb..f762b485f6 100644 --- a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c @@ -107,7 +107,7 @@ typedef struct _dri_bo_gem { dri_bo bo; int refcount; - unsigned int map_count; + GLboolean mapped; uint32_t gem_handle; const char *name; @@ -412,7 +412,8 @@ dri_gem_bo_unreference(dri_bo *bo) struct dri_gem_bo_bucket *bucket; int ret; - assert(bo_gem->map_count == 0); + if (bo_gem->mapped) + munmap (bo_gem->virtual, bo->size); if (bo_gem->relocs != NULL) { int i; @@ -474,7 +475,7 @@ dri_gem_bo_map(dri_bo *bo, GLboolean write_enable) /* Allow recursive mapping. Mesa may recursively map buffers with * nested display loops. */ - if (bo_gem->map_count++ == 0) { + if (!bo_gem->mapped) { assert(bo->virtual == NULL); @@ -496,6 +497,7 @@ dri_gem_bo_map(dri_bo *bo, GLboolean write_enable) bo_gem->virtual = (void *)(uintptr_t)mmap_arg.addr_ptr; } bo->virtual = bo_gem->virtual; + bo_gem->mapped = GL_TRUE; DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, bo_gem->virtual); } @@ -519,25 +521,12 @@ dri_gem_bo_map(dri_bo *bo, GLboolean write_enable) static int dri_gem_bo_unmap(dri_bo *bo) { - dri_bufmgr_gem *bufmgr_gem; dri_bo_gem *bo_gem = (dri_bo_gem *)bo; if (bo == NULL) return 0; - assert(bo_gem->map_count != 0); - if (--bo_gem->map_count != 0) - return 0; - - bufmgr_gem = (dri_bufmgr_gem *)bo->bufmgr; - - assert(bo->virtual != NULL); - - DBG("bo_unmap: %d (%s)\n", bo_gem->gem_handle, bo_gem->name); - - munmap(bo_gem->virtual, bo->size); - bo_gem->virtual = NULL; - bo->virtual = NULL; + assert(bo_gem->mapped); return 0; } -- cgit v1.2.3 From fccc427aac17b3fa17160332e6e6f3c2cef25ca5 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 30 May 2008 15:12:15 -0700 Subject: [intel-gem] Remember last offset of reused BOs to avoid more kernel relocs. This is good for about 5% on ipers on 965, and should help any cpu-bound app. --- src/mesa/drivers/dri/intel/intel_bufmgr_gem.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c index f762b485f6..3c1c3157e1 100644 --- a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c +++ b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c @@ -65,6 +65,7 @@ struct intel_validate_entry { struct dri_gem_bo_bucket_entry { uint32_t gem_handle; + uint32_t last_offset; struct dri_gem_bo_bucket_entry *next; }; @@ -306,6 +307,7 @@ dri_gem_bo_alloc(dri_bufmgr *bufmgr, const char *name, bucket->num_entries--; bo_gem->gem_handle = entry->gem_handle; + bo_gem->bo.offset = entry->last_offset; free(entry); } } @@ -324,7 +326,6 @@ dri_gem_bo_alloc(dri_bufmgr *bufmgr, const char *name, } } - bo_gem->bo.offset = 0; bo_gem->bo.virtual = NULL; bo_gem->bo.bufmgr = bufmgr; bo_gem->name = name; @@ -436,6 +437,7 @@ dri_gem_bo_unreference(dri_bo *bo) entry = calloc(1, sizeof(*entry)); entry->gem_handle = bo_gem->gem_handle; + entry->last_offset = bo->offset; entry->next = NULL; *bucket->tail = entry; -- cgit v1.2.3 From 4b5b008d54e86ac4f0a2176429d062100978ca8c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 3 Jun 2008 14:43:48 -0700 Subject: [intel] Convert drivers to using libdrm bufmgr code. --- src/mesa/drivers/dri/Makefile.template | 4 - src/mesa/drivers/dri/common/dri_bufmgr.c | 171 --- src/mesa/drivers/dri/common/dri_bufmgr.h | 216 ---- src/mesa/drivers/dri/i915/Makefile | 5 +- src/mesa/drivers/dri/i915/intel_bufmgr_fake.c | 1 - src/mesa/drivers/dri/i915/intel_bufmgr_gem.c | 1 - src/mesa/drivers/dri/i915/intel_bufmgr_ttm.c | 1 - src/mesa/drivers/dri/i965/Makefile | 3 - src/mesa/drivers/dri/i965/brw_cc.c | 12 +- src/mesa/drivers/dri/i965/brw_clip_state.c | 12 +- src/mesa/drivers/dri/i965/brw_curbe.c | 5 +- src/mesa/drivers/dri/i965/brw_draw_upload.c | 5 +- src/mesa/drivers/dri/i965/brw_gs_state.c | 10 +- src/mesa/drivers/dri/i965/brw_sf_state.c | 20 +- src/mesa/drivers/dri/i965/brw_state_cache.c | 5 +- src/mesa/drivers/dri/i965/brw_vs_state.c | 10 +- src/mesa/drivers/dri/i965/brw_wm_sampler_state.c | 12 +- src/mesa/drivers/dri/i965/brw_wm_state.c | 32 +- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 34 +- src/mesa/drivers/dri/i965/intel_bufmgr_fake.c | 1 - src/mesa/drivers/dri/i965/intel_bufmgr_gem.c | 1 - src/mesa/drivers/dri/i965/intel_bufmgr_ttm.c | 1 - src/mesa/drivers/dri/intel/intel_batchbuffer.c | 8 +- src/mesa/drivers/dri/intel/intel_buffer_objects.c | 3 +- src/mesa/drivers/dri/intel/intel_bufmgr_fake.c | 1177 --------------------- src/mesa/drivers/dri/intel/intel_bufmgr_fake.h | 50 - src/mesa/drivers/dri/intel/intel_bufmgr_gem.c | 847 --------------- src/mesa/drivers/dri/intel/intel_bufmgr_gem.h | 16 - src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c | 1102 ------------------- src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h | 20 - src/mesa/drivers/dri/intel/intel_context.c | 19 +- src/mesa/drivers/dri/intel/intel_context.h | 1 + src/mesa/drivers/dri/intel/intel_ioctl.c | 2 +- src/mesa/drivers/dri/intel/intel_regions.c | 33 +- src/mesa/drivers/dri/intel/intel_screen.c | 2 +- 35 files changed, 105 insertions(+), 3737 deletions(-) delete mode 100644 src/mesa/drivers/dri/common/dri_bufmgr.c delete mode 100644 src/mesa/drivers/dri/common/dri_bufmgr.h delete mode 120000 src/mesa/drivers/dri/i915/intel_bufmgr_fake.c delete mode 120000 src/mesa/drivers/dri/i915/intel_bufmgr_gem.c delete mode 120000 src/mesa/drivers/dri/i915/intel_bufmgr_ttm.c delete mode 120000 src/mesa/drivers/dri/i965/intel_bufmgr_fake.c delete mode 120000 src/mesa/drivers/dri/i965/intel_bufmgr_gem.c delete mode 120000 src/mesa/drivers/dri/i965/intel_bufmgr_ttm.c delete mode 100644 src/mesa/drivers/dri/intel/intel_bufmgr_fake.c delete mode 100644 src/mesa/drivers/dri/intel/intel_bufmgr_fake.h delete mode 100644 src/mesa/drivers/dri/intel/intel_bufmgr_gem.c delete mode 100644 src/mesa/drivers/dri/intel/intel_bufmgr_gem.h delete mode 100644 src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c delete mode 100644 src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h (limited to 'src') diff --git a/src/mesa/drivers/dri/Makefile.template b/src/mesa/drivers/dri/Makefile.template index cb41662707..864c6234c8 100644 --- a/src/mesa/drivers/dri/Makefile.template +++ b/src/mesa/drivers/dri/Makefile.template @@ -11,10 +11,6 @@ COMMON_SOURCES = \ ../common/xmlconfig.c \ ../common/drirenderbuffer.c -COMMON_BM_SOURCES = \ - ../common/dri_bufmgr.c - - ifeq ($(WINDOW_SYSTEM),dri) WINOBJ= WINLIB= diff --git a/src/mesa/drivers/dri/common/dri_bufmgr.c b/src/mesa/drivers/dri/common/dri_bufmgr.c deleted file mode 100644 index be2a7b740c..0000000000 --- a/src/mesa/drivers/dri/common/dri_bufmgr.c +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Copyright © 2007 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Eric Anholt - * - */ - -#include -#include -#include -#include "mtypes.h" -#include "dri_bufmgr.h" - -/** @file dri_bufmgr.c - * - * Convenience functions for buffer management methods. - */ - -dri_bo * -dri_bo_alloc(dri_bufmgr *bufmgr, const char *name, unsigned long size, - unsigned int alignment, uint64_t location_mask) -{ - assert((location_mask & ~(DRM_BO_FLAG_MEM_LOCAL | DRM_BO_FLAG_MEM_TT | - DRM_BO_FLAG_MEM_VRAM | DRM_BO_FLAG_MEM_PRIV0 | - DRM_BO_FLAG_MEM_PRIV1 | DRM_BO_FLAG_MEM_PRIV2 | - DRM_BO_FLAG_MEM_PRIV3 | DRM_BO_FLAG_MEM_PRIV4 | - DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED)) == 0); - return bufmgr->bo_alloc(bufmgr, name, size, alignment, location_mask); -} - -dri_bo * -dri_bo_alloc_static(dri_bufmgr *bufmgr, const char *name, unsigned long offset, - unsigned long size, void *virtual, - uint64_t location_mask) -{ - assert((location_mask & ~(DRM_BO_FLAG_MEM_LOCAL | DRM_BO_FLAG_MEM_TT | - DRM_BO_FLAG_MEM_VRAM | DRM_BO_FLAG_MEM_PRIV0 | - DRM_BO_FLAG_MEM_PRIV1 | DRM_BO_FLAG_MEM_PRIV2 | - DRM_BO_FLAG_MEM_PRIV3 | - DRM_BO_FLAG_MEM_PRIV4)) == 0); - - return bufmgr->bo_alloc_static(bufmgr, name, offset, size, virtual, - location_mask); -} - -void -dri_bo_reference(dri_bo *bo) -{ - bo->bufmgr->bo_reference(bo); -} - -void -dri_bo_unreference(dri_bo *bo) -{ - if (bo == NULL) - return; - - bo->bufmgr->bo_unreference(bo); -} - -int -dri_bo_map(dri_bo *buf, GLboolean write_enable) -{ - return buf->bufmgr->bo_map(buf, write_enable); -} - -int -dri_bo_unmap(dri_bo *buf) -{ - return buf->bufmgr->bo_unmap(buf); -} - -int -dri_bo_subdata(dri_bo *bo, unsigned long offset, - unsigned long size, const void *data) -{ - int ret; - if (bo->bufmgr->bo_subdata) - return bo->bufmgr->bo_subdata(bo, offset, size, data); - if (size == 0 || data == NULL) - return 0; - - ret = dri_bo_map(bo, GL_TRUE); - if (ret) - return ret; - memcpy((unsigned char *)bo->virtual + offset, data, size); - dri_bo_unmap(bo); - return 0; -} - -int -dri_bo_get_subdata(dri_bo *bo, unsigned long offset, - unsigned long size, void *data) -{ - int ret; - if (bo->bufmgr->bo_subdata) - return bo->bufmgr->bo_get_subdata(bo, offset, size, data); - - if (size == 0 || data == NULL) - return 0; - - ret = dri_bo_map(bo, GL_FALSE); - if (ret) - return ret; - memcpy(data, (unsigned char *)bo->virtual + offset, size); - dri_bo_unmap(bo); - return 0; -} - -void -dri_bo_wait_rendering(dri_bo *bo) -{ - bo->bufmgr->bo_wait_rendering(bo); -} - -void -dri_bufmgr_destroy(dri_bufmgr *bufmgr) -{ - bufmgr->destroy(bufmgr); -} - - -int dri_emit_reloc(dri_bo *reloc_buf, - uint32_t read_domains, uint32_t write_domain, - uint32_t delta, uint32_t offset, dri_bo *target_buf) -{ - return reloc_buf->bufmgr->emit_reloc(reloc_buf, read_domains, write_domain, - delta, offset, target_buf); -} - -void *dri_process_relocs(dri_bo *batch_buf) -{ - return batch_buf->bufmgr->process_relocs(batch_buf); -} - -void dri_post_submit(dri_bo *batch_buf) -{ - batch_buf->bufmgr->post_submit(batch_buf); -} - -void -dri_bufmgr_set_debug(dri_bufmgr *bufmgr, GLboolean enable_debug) -{ - bufmgr->debug = enable_debug; -} - -int -dri_bufmgr_check_aperture_space(dri_bo *bo) -{ - return bo->bufmgr->check_aperture_space(bo); -} diff --git a/src/mesa/drivers/dri/common/dri_bufmgr.h b/src/mesa/drivers/dri/common/dri_bufmgr.h deleted file mode 100644 index 1abca08cc8..0000000000 --- a/src/mesa/drivers/dri/common/dri_bufmgr.h +++ /dev/null @@ -1,216 +0,0 @@ -/************************************************************************** - * - * Copyright © 2007 Intel Corporation - * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * - **************************************************************************/ -/* - * Authors: Thomas Hellström - * Keith Whitwell - * Eric Anholt - */ - -#ifndef _DRI_BUFMGR_H_ -#define _DRI_BUFMGR_H_ -#include - -typedef struct _dri_bufmgr dri_bufmgr; -typedef struct _dri_bo dri_bo; - -struct _dri_bo { - /** - * Size in bytes of the buffer object. - * - * The size may be larger than the size originally requested for the - * allocation, such as being aligned to page size. - */ - unsigned long size; - /** - * Card virtual address (offset from the beginning of the aperture) for the - * object. Only valid while validated. - */ - unsigned long offset; - /** - * Virtual address for accessing the buffer data. Only valid while mapped. - */ - void *virtual; - /** Buffer manager context associated with this buffer object */ - dri_bufmgr *bufmgr; -}; - -/** - * Context for a buffer manager instance. - * - * Contains public methods followed by private storage for the buffer manager. - */ -struct _dri_bufmgr { - /** - * Allocate a buffer object. - * - * Buffer objects are not necessarily initially mapped into CPU virtual - * address space or graphics device aperture. They must be mapped using - * bo_map() to be used by the CPU, and validated for use using bo_validate() - * to be used from the graphics device. - */ - dri_bo *(*bo_alloc)(dri_bufmgr *bufmgr_ctx, const char *name, - unsigned long size, unsigned int alignment, - uint64_t location_mask); - - /** - * Allocates a buffer object for a static allocation. - * - * Static allocations are ones such as the front buffer that are offered by - * the X Server, which are never evicted and never moved. - */ - dri_bo *(*bo_alloc_static)(dri_bufmgr *bufmgr_ctx, const char *name, - unsigned long offset, unsigned long size, - void *virtual, uint64_t location_mask); - - /** Takes a reference on a buffer object */ - void (*bo_reference)(dri_bo *bo); - - /** - * Releases a reference on a buffer object, freeing the data if - * rerefences remain. - */ - void (*bo_unreference)(dri_bo *bo); - - /** - * Maps the buffer into userspace. - * - * This function will block waiting for any existing execution on the - * buffer to complete, first. The resulting mapping is available at - * buf->virtual. - */ - int (*bo_map)(dri_bo *buf, GLboolean write_enable); - - /** Reduces the refcount on the userspace mapping of the buffer object. */ - int (*bo_unmap)(dri_bo *buf); - - /** - * Write data into an object. - * - * This is an optional function, if missing, - * dri_bo will map/memcpy/unmap. - */ - int (*bo_subdata) (dri_bo *buf, unsigned long offset, - unsigned long size, const void *data); - - /** - * Read data from an object - * - * This is an optional function, if missing, - * dri_bo will map/memcpy/unmap. - */ - int (*bo_get_subdata) (dri_bo *bo, unsigned long offset, - unsigned long size, void *data); - - /** - * Waits for rendering to an object by the GPU to have completed. - * - * This is not required for any access to the BO by bo_map, bo_subdata, etc. - * It is merely a way for the driver to implement glFinish. - */ - void (*bo_wait_rendering) (dri_bo *bo); - - /** - * Tears down the buffer manager instance. - */ - void (*destroy)(dri_bufmgr *bufmgr); - - /** - * Add relocation entry in reloc_buf, which will be updated with the - * target buffer's real offset on on command submission. - * - * Relocations remain in place for the lifetime of the buffer object. - * - * \param reloc_buf Buffer to write the relocation into. - * \param flags BO flags to be used in validating the target buffer. - * Applicable flags include: - * - DRM_BO_FLAG_READ: The buffer will be read in the process of - * command execution. - * - DRM_BO_FLAG_WRITE: The buffer will be written in the process of - * command execution. - * - DRM_BO_FLAG_MEM_TT: The buffer should be validated in TT memory. - * - DRM_BO_FLAG_MEM_VRAM: The buffer should be validated in video - * memory. - * \param delta Constant value to be added to the relocation target's offset. - * \param offset Byte offset within batch_buf of the relocated pointer. - * \param target Buffer whose offset should be written into the relocation - * entry. - */ - int (*emit_reloc)(dri_bo *reloc_buf, - uint32_t read_domains, uint32_t write_domain, - uint32_t delta, uint32_t offset, dri_bo *target); - - /** - * Processes the relocations, either in userland or by converting the list - * for use in batchbuffer submission. - * - * Kernel-based implementations will return a pointer to the arguments - * to be handed with batchbuffer submission to the kernel. The userland - * implementation performs the buffer validation and emits relocations - * into them the appopriate order. - * - * \param batch_buf buffer at the root of the tree of relocations - * \return argument to be completed and passed to the execbuffers ioctl - * (if any). - */ - void *(*process_relocs)(dri_bo *batch_buf); - - void (*post_submit)(dri_bo *batch_buf); - - int (*check_aperture_space)(dri_bo *bo); - GLboolean debug; /**< Enables verbose debugging printouts */ -}; - -dri_bo *dri_bo_alloc(dri_bufmgr *bufmgr, const char *name, unsigned long size, - unsigned int alignment, uint64_t location_mask); -dri_bo *dri_bo_alloc_static(dri_bufmgr *bufmgr, const char *name, - unsigned long offset, unsigned long size, - void *virtual, uint64_t location_mask); -void dri_bo_reference(dri_bo *bo); -void dri_bo_unreference(dri_bo *bo); -int dri_bo_map(dri_bo *buf, GLboolean write_enable); -int dri_bo_unmap(dri_bo *buf); - -int dri_bo_subdata(dri_bo *bo, unsigned long offset, - unsigned long size, const void *data); -int dri_bo_get_subdata(dri_bo *bo, unsigned long offset, - unsigned long size, void *data); -void dri_bo_wait_rendering(dri_bo *bo); - -void dri_bufmgr_set_debug(dri_bufmgr *bufmgr, GLboolean enable_debug); -void dri_bufmgr_destroy(dri_bufmgr *bufmgr); - -int dri_emit_reloc(dri_bo *reloc_buf, - uint32_t read_domains, uint32_t write_domain, - uint32_t delta, uint32_t offset, dri_bo *target_buf); -void *dri_process_relocs(dri_bo *batch_buf); -void dri_post_process_relocs(dri_bo *batch_buf); -void dri_post_submit(dri_bo *batch_buf); -int dri_bufmgr_check_aperture_space(dri_bo *bo); - -#endif diff --git a/src/mesa/drivers/dri/i915/Makefile b/src/mesa/drivers/dri/i915/Makefile index 476814c4ec..74f6169b2e 100644 --- a/src/mesa/drivers/dri/i915/Makefile +++ b/src/mesa/drivers/dri/i915/Makefile @@ -53,13 +53,10 @@ DRIVER_SOURCES = \ intel_state.c \ intel_tris.c \ intel_fbo.c \ - intel_depthstencil.c \ - intel_bufmgr_fake.c \ - intel_bufmgr_gem.c + intel_depthstencil.c C_SOURCES = \ $(COMMON_SOURCES) \ - $(COMMON_BM_SOURCES) \ $(DRIVER_SOURCES) ASM_SOURCES = diff --git a/src/mesa/drivers/dri/i915/intel_bufmgr_fake.c b/src/mesa/drivers/dri/i915/intel_bufmgr_fake.c deleted file mode 120000 index 9b840a8123..0000000000 --- a/src/mesa/drivers/dri/i915/intel_bufmgr_fake.c +++ /dev/null @@ -1 +0,0 @@ -../intel/intel_bufmgr_fake.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/i915/intel_bufmgr_gem.c b/src/mesa/drivers/dri/i915/intel_bufmgr_gem.c deleted file mode 120000 index dee0daf9c0..0000000000 --- a/src/mesa/drivers/dri/i915/intel_bufmgr_gem.c +++ /dev/null @@ -1 +0,0 @@ -../intel/intel_bufmgr_gem.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/i915/intel_bufmgr_ttm.c b/src/mesa/drivers/dri/i915/intel_bufmgr_ttm.c deleted file mode 120000 index e9df5c6279..0000000000 --- a/src/mesa/drivers/dri/i915/intel_bufmgr_ttm.c +++ /dev/null @@ -1 +0,0 @@ -../intel/intel_bufmgr_ttm.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index 001f63ba12..c15418df06 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -9,8 +9,6 @@ DRIVER_SOURCES = \ intel_blit.c \ intel_buffer_objects.c \ intel_buffers.c \ - intel_bufmgr_fake.c \ - intel_bufmgr_gem.c \ intel_context.c \ intel_decode.c \ intel_depthstencil.c \ @@ -85,7 +83,6 @@ DRIVER_SOURCES = \ C_SOURCES = \ $(COMMON_SOURCES) \ - $(COMMON_BM_SOURCES) \ $(MINIGLX_SOURCES) \ $(DRIVER_SOURCES) diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c index b9338db0f5..afcfbcccb9 100644 --- a/src/mesa/drivers/dri/i965/brw_cc.c +++ b/src/mesa/drivers/dri/i965/brw_cc.c @@ -256,12 +256,12 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key) NULL, NULL); /* Emit CC viewport relocation */ - dri_emit_reloc(bo, - DRM_GEM_DOMAIN_I915_INSTRUCTION, - 0, - 0, - offsetof(struct brw_cc_unit_state, cc4), - brw->cc.vp_bo); + intel_bo_emit_reloc(bo, + DRM_GEM_DOMAIN_I915_INSTRUCTION, + 0, + 0, + offsetof(struct brw_cc_unit_state, cc4), + brw->cc.vp_bo); return bo; } diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c index 26c322672c..fd5157bdb7 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_state.c +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c @@ -119,12 +119,12 @@ clip_unit_create_from_key(struct brw_context *brw, /* Emit clip program relocation */ assert(brw->clip.prog_bo); - dri_emit_reloc(bo, - DRM_GEM_DOMAIN_I915_INSTRUCTION, - 0, - clip.thread0.grf_reg_count << 1, - offsetof(struct brw_clip_unit_state, thread0), - brw->clip.prog_bo); + intel_bo_emit_reloc(bo, + DRM_GEM_DOMAIN_I915_INSTRUCTION, + 0, + clip.thread0.grf_reg_count << 1, + offsetof(struct brw_clip_unit_state, thread0), + brw->clip.prog_bo); return bo; } diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 1b5e22f130..bd0b04c36f 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -306,10 +306,7 @@ static int prepare_constant_buffer(struct brw_context *brw) * They're generally around 64b. */ brw->curbe.curbe_bo = dri_bo_alloc(brw->intel.bufmgr, "CURBE", - 4096, 1 << 6, - DRM_BO_FLAG_MEM_LOCAL | - DRM_BO_FLAG_CACHED | - DRM_BO_FLAG_CACHED_MAPPED); + 4096, 1 << 6); brw->curbe.curbe_next_offset = 0; } diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 5222d2e450..026c8ed898 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -247,10 +247,7 @@ static void wrap_buffers( struct brw_context *brw, if (brw->vb.upload.bo != NULL) dri_bo_unreference(brw->vb.upload.bo); brw->vb.upload.bo = dri_bo_alloc(brw->intel.bufmgr, "temporary VBO", - size, 1, - DRM_BO_FLAG_MEM_LOCAL | - DRM_BO_FLAG_CACHED | - DRM_BO_FLAG_CACHED_MAPPED); + size, 1); /* Set the internal VBO\ to no-backing-store. We only use them as a * temporary within a brw_try_draw_prims while the lock is held. diff --git a/src/mesa/drivers/dri/i965/brw_gs_state.c b/src/mesa/drivers/dri/i965/brw_gs_state.c index 2bf86f5573..953ccf777f 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_state.c +++ b/src/mesa/drivers/dri/i965/brw_gs_state.c @@ -106,11 +106,11 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) if (key->prog_active) { /* Emit GS program relocation */ - dri_emit_reloc(bo, - DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, - gs.thread0.grf_reg_count << 1, - offsetof(struct brw_gs_unit_state, thread0), - brw->gs.prog_bo); + intel_bo_emit_reloc(bo, + DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, + gs.thread0.grf_reg_count << 1, + offsetof(struct brw_gs_unit_state, thread0), + brw->gs.prog_bo); } return bo; diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index 5cf3228486..e8f36718a3 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -253,18 +253,18 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, NULL, NULL); /* Emit SF program relocation */ - dri_emit_reloc(bo, - DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, - sf.thread0.grf_reg_count << 1, - offsetof(struct brw_sf_unit_state, thread0), - brw->sf.prog_bo); + intel_bo_emit_reloc(bo, + DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, + sf.thread0.grf_reg_count << 1, + offsetof(struct brw_sf_unit_state, thread0), + brw->sf.prog_bo); /* Emit SF viewport relocation */ - dri_emit_reloc(bo, - DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, - sf.sf5.front_winding | (sf.sf5.viewport_transform << 1), - offsetof(struct brw_sf_unit_state, sf5), - brw->sf.vp_bo); + intel_bo_emit_reloc(bo, + DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, + sf.sf5.front_winding | (sf.sf5.viewport_transform << 1), + offsetof(struct brw_sf_unit_state, sf5), + brw->sf.vp_bo); return bo; } diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index d617650fad..fc0c3bd9ff 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -214,10 +214,7 @@ brw_upload_cache( struct brw_cache *cache, /* Create the buffer object to contain the data */ bo = dri_bo_alloc(cache->brw->intel.bufmgr, - cache->name[cache_id], data_size, 1 << 6, - DRM_BO_FLAG_MEM_LOCAL | - DRM_BO_FLAG_CACHED | - DRM_BO_FLAG_CACHED_MAPPED); + cache->name[cache_id], data_size, 1 << 6); /* Set up the memory containing the key, aux_data, and reloc_bufs */ diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index 73f52d7428..a6b3db69ea 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -115,11 +115,11 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) NULL, NULL); /* Emit VS program relocation */ - dri_emit_reloc(bo, - DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, - vs.thread0.grf_reg_count << 1, - offsetof(struct brw_vs_unit_state, thread0), - brw->vs.prog_bo); + intel_bo_emit_reloc(bo, + DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, + vs.thread0.grf_reg_count << 1, + offsetof(struct brw_vs_unit_state, thread0), + brw->vs.prog_bo); return bo; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index 13f7f21800..2e0aff7ab2 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -305,12 +305,12 @@ static int upload_wm_samplers( struct brw_context *brw ) continue; ret |= dri_bufmgr_check_aperture_space(brw->wm.sdc_bo[i]); - dri_emit_reloc(brw->wm.sampler_bo, - DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, - 0, - i * sizeof(struct brw_sampler_state) + - offsetof(struct brw_sampler_state, ss2), - brw->wm.sdc_bo[i]); + intel_bo_emit_reloc(brw->wm.sampler_bo, + DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, + 0, + i * sizeof(struct brw_sampler_state) + + offsetof(struct brw_sampler_state, ss2), + brw->wm.sdc_bo[i]); } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index f79b58ba7a..ef78d71bbb 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -199,28 +199,28 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, NULL, NULL); /* Emit WM program relocation */ - dri_emit_reloc(bo, - DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, - wm.thread0.grf_reg_count << 1, - offsetof(struct brw_wm_unit_state, thread0), - brw->wm.prog_bo); + intel_bo_emit_reloc(bo, + DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, + wm.thread0.grf_reg_count << 1, + offsetof(struct brw_wm_unit_state, thread0), + brw->wm.prog_bo); /* Emit scratch space relocation */ if (key->total_scratch != 0) { - dri_emit_reloc(bo, - 0, 0, - wm.thread2.per_thread_scratch_space, - offsetof(struct brw_wm_unit_state, thread2), - brw->wm.scratch_buffer); + intel_bo_emit_reloc(bo, + 0, 0, + wm.thread2.per_thread_scratch_space, + offsetof(struct brw_wm_unit_state, thread2), + brw->wm.scratch_buffer); } /* Emit sampler state relocation */ if (key->sampler_count != 0) { - dri_emit_reloc(bo, - DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, - wm.wm4.stats_enable | (wm.wm4.sampler_count << 2), - offsetof(struct brw_wm_unit_state, wm4), - brw->wm.sampler_bo); + intel_bo_emit_reloc(bo, + DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, + wm.wm4.stats_enable | (wm.wm4.sampler_count << 2), + offsetof(struct brw_wm_unit_state, wm4), + brw->wm.sampler_bo); } return bo; @@ -251,7 +251,7 @@ static int upload_wm_unit( struct brw_context *brw ) brw->wm.scratch_buffer = dri_bo_alloc(intel->bufmgr, "wm scratch", total, - 4096, DRM_BO_FLAG_MEM_TT); + 4096); } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 73f4b2b4a3..6fc6d9dfd8 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -203,11 +203,11 @@ brw_create_texture_surface( struct brw_context *brw, NULL, NULL); /* Emit relocation to surface contents */ - dri_emit_reloc(bo, - DRM_GEM_DOMAIN_I915_SAMPLER, 0, - 0, - offsetof(struct brw_surface_state, ss1), - key->bo); + intel_bo_emit_reloc(bo, + DRM_GEM_DOMAIN_I915_SAMPLER, 0, + 0, + offsetof(struct brw_surface_state, ss1), + key->bo); return bo; } @@ -341,13 +341,13 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, * them both. We might be able to figure out from other state * a more restrictive relocation to emit. */ - dri_emit_reloc(brw->wm.surf_bo[unit], - DRM_GEM_DOMAIN_I915_RENDER | - DRM_GEM_DOMAIN_I915_SAMPLER, - DRM_GEM_DOMAIN_I915_RENDER, - 0, - offsetof(struct brw_surface_state, ss1), - region_bo); + intel_bo_emit_reloc(brw->wm.surf_bo[unit], + DRM_GEM_DOMAIN_I915_RENDER | + DRM_GEM_DOMAIN_I915_SAMPLER, + DRM_GEM_DOMAIN_I915_RENDER, + 0, + offsetof(struct brw_surface_state, ss1), + region_bo); } } @@ -391,11 +391,11 @@ brw_wm_get_binding_table(struct brw_context *brw) /* Emit binding table relocations to surface state */ for (i = 0; i < BRW_WM_MAX_SURF; i++) { if (brw->wm.surf_bo[i] != NULL) { - dri_emit_reloc(bind_bo, - DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, - 0, - i * sizeof(GLuint), - brw->wm.surf_bo[i]); + intel_bo_emit_reloc(bind_bo, + DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, + 0, + i * sizeof(GLuint), + brw->wm.surf_bo[i]); } } diff --git a/src/mesa/drivers/dri/i965/intel_bufmgr_fake.c b/src/mesa/drivers/dri/i965/intel_bufmgr_fake.c deleted file mode 120000 index 9b840a8123..0000000000 --- a/src/mesa/drivers/dri/i965/intel_bufmgr_fake.c +++ /dev/null @@ -1 +0,0 @@ -../intel/intel_bufmgr_fake.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/intel_bufmgr_gem.c b/src/mesa/drivers/dri/i965/intel_bufmgr_gem.c deleted file mode 120000 index dee0daf9c0..0000000000 --- a/src/mesa/drivers/dri/i965/intel_bufmgr_gem.c +++ /dev/null @@ -1 +0,0 @@ -../intel/intel_bufmgr_gem.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/intel_bufmgr_ttm.c b/src/mesa/drivers/dri/i965/intel_bufmgr_ttm.c deleted file mode 120000 index e9df5c6279..0000000000 --- a/src/mesa/drivers/dri/i965/intel_bufmgr_ttm.c +++ /dev/null @@ -1 +0,0 @@ -../intel/intel_bufmgr_ttm.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index 803ff5e90e..019880581a 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -29,6 +29,7 @@ #include "intel_ioctl.h" #include "intel_decode.h" #include "intel_reg.h" +#include "intel_bufmgr.h" /* Relocations in kernel space: * - pass dma buffer seperately @@ -82,8 +83,7 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch) batch->buffer = malloc (intel->maxBatchSize); batch->buf = dri_bo_alloc(intel->bufmgr, "batchbuffer", - intel->maxBatchSize, 4096, - DRM_BO_FLAG_MEM_LOCAL | DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED); + intel->maxBatchSize, 4096); if (batch->buffer) batch->map = batch->buffer; else { @@ -290,8 +290,8 @@ intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, if (batch->ptr - batch->map > batch->buf->size) _mesa_printf ("bad relocation ptr %p map %p offset %d size %d\n", batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size); - ret = dri_emit_reloc(batch->buf, read_domains, write_domain, - delta, batch->ptr - batch->map, buffer); + ret = intel_bo_emit_reloc(batch->buf, read_domains, write_domain, + delta, batch->ptr - batch->map, buffer); /* * Using the old buffer offset, write in what the right data would be, in case diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c index 951b8cbfb7..4227f0c973 100644 --- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c @@ -45,8 +45,7 @@ intel_bufferobj_alloc_buffer(struct intel_context *intel, struct intel_buffer_object *intel_obj) { intel_obj->buffer = dri_bo_alloc(intel->bufmgr, "bufferobj", - intel_obj->Base.Size, 64, - DRM_BO_FLAG_MEM_LOCAL | DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED); + intel_obj->Base.Size, 64); } /** diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_fake.c b/src/mesa/drivers/dri/intel/intel_bufmgr_fake.c deleted file mode 100644 index 2aed3d85be..0000000000 --- a/src/mesa/drivers/dri/intel/intel_bufmgr_fake.c +++ /dev/null @@ -1,1177 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Originally a fake version of the buffer manager so that we can - * prototype the changes in a driver fairly quickly, has been fleshed - * out to a fully functional interim solution. - * - * Basically wraps the old style memory management in the new - * programming interface, but is more expressive and avoids many of - * the bugs in the old texture manager. - */ -#include "mtypes.h" -#include "dri_bufmgr.h" -#include "intel_bufmgr_fake.h" -#include "drm.h" -#include "i915_drm.h" - -#include "simple_list.h" -#include "mm.h" -#include "imports.h" - -#define DBG(...) do { \ - if (bufmgr_fake->bufmgr.debug) \ - _mesa_printf(__VA_ARGS__); \ -} while (0) - -/* Internal flags: - */ -#define BM_NO_BACKING_STORE 0x00000001 -#define BM_NO_FENCE_SUBDATA 0x00000002 -#define BM_PINNED 0x00000004 - -/* Wrapper around mm.c's mem_block, which understands that you must - * wait for fences to expire before memory can be freed. This is - * specific to our use of memcpy for uploads - an upload that was - * processed through the command queue wouldn't need to care about - * fences. - */ -#define MAX_RELOCS 4096 - -struct fake_buffer_reloc -{ - /** Buffer object that the relocation points at. */ - dri_bo *target_buf; - /** Offset of the relocation entry within reloc_buf. */ - GLuint offset; - /** Cached value of the offset when we last performed this relocation. */ - GLuint last_target_offset; - /** Value added to target_buf's offset to get the relocation entry. */ - GLuint delta; - /** Cache domains the target buffer is read into. */ - uint32_t read_domains; - /** Cache domain the target buffer will have dirty cachelines in. */ - uint32_t write_domain; -}; - -struct block { - struct block *next, *prev; - struct mem_block *mem; /* BM_MEM_AGP */ - - /** - * Marks that the block is currently in the aperture and has yet to be - * fenced. - */ - unsigned on_hardware:1; - /** - * Marks that the block is currently fenced (being used by rendering) and - * can't be freed until @fence is passed. - */ - unsigned fenced:1; - - /** Fence cookie for the block. */ - unsigned fence; /* Split to read_fence, write_fence */ - - dri_bo *bo; - void *virtual; -}; - -typedef struct _bufmgr_fake { - dri_bufmgr bufmgr; - - unsigned long low_offset; - unsigned long size; - void *virtual; - - struct mem_block *heap; - struct block lru; /* only allocated, non-fence-pending blocks here */ - - unsigned buf_nr; /* for generating ids */ - - struct block on_hardware; /* after bmValidateBuffers */ - struct block fenced; /* after bmFenceBuffers (mi_flush, emit irq, write dword) */ - /* then to bufmgr->lru or free() */ - - unsigned int last_fence; - - unsigned fail:1; - unsigned need_fence:1; - GLboolean thrashing; - - /** - * Driver callback to emit a fence, returning the cookie. - * - * Currently, this also requires that a write flush be emitted before - * emitting the fence, but this should change. - */ - unsigned int (*fence_emit)(void *private); - /** Driver callback to wait for a fence cookie to have passed. */ - int (*fence_wait)(void *private, unsigned int fence_cookie); - /** Driver-supplied argument to driver callbacks */ - void *driver_priv; - - GLboolean debug; - - GLboolean performed_rendering; - - /* keep track of the current total size of objects we have relocs for */ - unsigned long current_total_size; -} dri_bufmgr_fake; - -typedef struct _dri_bo_fake { - dri_bo bo; - - unsigned id; /* debug only */ - const char *name; - - unsigned dirty:1; - unsigned size_accounted:1; /*this buffers size has been accounted against the aperture */ - unsigned card_dirty:1; /* has the card written to this buffer - we make need to copy it back */ - unsigned int refcount; - /* Flags may consist of any of the DRM_BO flags, plus - * DRM_BO_NO_BACKING_STORE and BM_NO_FENCE_SUBDATA, which are the first two - * driver private flags. - */ - uint64_t flags; - /** Cache domains the target buffer is read into. */ - uint32_t read_domains; - /** Cache domain the target buffer will have dirty cachelines in. */ - uint32_t write_domain; - - unsigned int alignment; - GLboolean is_static, validated; - unsigned int map_count; - - /** relocation list */ - struct fake_buffer_reloc *relocs; - GLuint nr_relocs; - - struct block *block; - void *backing_store; - void (*invalidate_cb)(dri_bo *bo, void *ptr); - void *invalidate_ptr; -} dri_bo_fake; - -static int clear_fenced(dri_bufmgr_fake *bufmgr_fake, - unsigned int fence_cookie); - -static int dri_fake_check_aperture_space(dri_bo *bo); - -#define MAXFENCE 0x7fffffff - -static GLboolean FENCE_LTE( unsigned a, unsigned b ) -{ - if (a == b) - return GL_TRUE; - - if (a < b && b - a < (1<<24)) - return GL_TRUE; - - if (a > b && MAXFENCE - a + b < (1<<24)) - return GL_TRUE; - - return GL_FALSE; -} - -static unsigned int -_fence_emit_internal(dri_bufmgr_fake *bufmgr_fake) -{ - bufmgr_fake->last_fence = bufmgr_fake->fence_emit(bufmgr_fake->driver_priv); - return bufmgr_fake->last_fence; -} - -static void -_fence_wait_internal(dri_bufmgr_fake *bufmgr_fake, unsigned int cookie) -{ - int ret; - - ret = bufmgr_fake->fence_wait(bufmgr_fake->driver_priv, cookie); - if (ret != 0) { - _mesa_printf("%s:%d: Error %d waiting for fence.\n", - __FILE__, __LINE__); - abort(); - } - clear_fenced(bufmgr_fake, cookie); -} - -static GLboolean -_fence_test(dri_bufmgr_fake *bufmgr_fake, unsigned fence) -{ - /* Slight problem with wrap-around: - */ - return fence == 0 || FENCE_LTE(fence, bufmgr_fake->last_fence); -} - -/** - * Allocate a memory manager block for the buffer. - */ -static GLboolean -alloc_block(dri_bo *bo) -{ - dri_bo_fake *bo_fake = (dri_bo_fake *)bo; - dri_bufmgr_fake *bufmgr_fake= (dri_bufmgr_fake *)bo->bufmgr; - struct block *block = (struct block *)calloc(sizeof *block, 1); - unsigned int align_log2 = _mesa_ffs(bo_fake->alignment) - 1; - GLuint sz; - - if (!block) - return GL_FALSE; - - sz = (bo->size + bo_fake->alignment - 1) & ~(bo_fake->alignment - 1); - - block->mem = mmAllocMem(bufmgr_fake->heap, sz, align_log2, 0); - if (!block->mem) { - free(block); - return GL_FALSE; - } - - make_empty_list(block); - - /* Insert at head or at tail??? - */ - insert_at_tail(&bufmgr_fake->lru, block); - - block->virtual = bufmgr_fake->virtual + - block->mem->ofs - bufmgr_fake->low_offset; - block->bo = bo; - - bo_fake->block = block; - - return GL_TRUE; -} - -/* Release the card storage associated with buf: - */ -static void free_block(dri_bufmgr_fake *bufmgr_fake, struct block *block) -{ - dri_bo_fake *bo_fake; - DBG("free block %p %08x %d %d\n", block, block->mem->ofs, block->on_hardware, block->fenced); - - if (!block) - return; - - bo_fake = (dri_bo_fake *)block->bo; - if (!(bo_fake->flags & BM_NO_BACKING_STORE) && (bo_fake->card_dirty == 1)) { - memcpy(bo_fake->backing_store, block->virtual, block->bo->size); - bo_fake->card_dirty = 1; - bo_fake->dirty = 1; - } - - if (block->on_hardware) { - block->bo = NULL; - } - else if (block->fenced) { - block->bo = NULL; - } - else { - DBG(" - free immediately\n"); - remove_from_list(block); - - mmFreeMem(block->mem); - free(block); - } -} - -static void -alloc_backing_store(dri_bo *bo) -{ - dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; - dri_bo_fake *bo_fake = (dri_bo_fake *)bo; - assert(!bo_fake->backing_store); - assert(!(bo_fake->flags & (BM_PINNED|BM_NO_BACKING_STORE))); - - bo_fake->backing_store = ALIGN_MALLOC(bo->size, 64); - - DBG("alloc_backing - buf %d %p %d\n", bo_fake->id, bo_fake->backing_store, bo->size); - assert(bo_fake->backing_store); -} - -static void -free_backing_store(dri_bo *bo) -{ - dri_bo_fake *bo_fake = (dri_bo_fake *)bo; - - if (bo_fake->backing_store) { - assert(!(bo_fake->flags & (BM_PINNED|BM_NO_BACKING_STORE))); - ALIGN_FREE(bo_fake->backing_store); - bo_fake->backing_store = NULL; - } -} - -static void -set_dirty(dri_bo *bo) -{ - dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; - dri_bo_fake *bo_fake = (dri_bo_fake *)bo; - - if (bo_fake->flags & BM_NO_BACKING_STORE && bo_fake->invalidate_cb != NULL) - bo_fake->invalidate_cb(bo, bo_fake->invalidate_ptr); - - assert(!(bo_fake->flags & BM_PINNED)); - - DBG("set_dirty - buf %d\n", bo_fake->id); - bo_fake->dirty = 1; -} - -static GLboolean -evict_lru(dri_bufmgr_fake *bufmgr_fake, GLuint max_fence) -{ - struct block *block, *tmp; - - DBG("%s\n", __FUNCTION__); - - foreach_s(block, tmp, &bufmgr_fake->lru) { - dri_bo_fake *bo_fake = (dri_bo_fake *)block->bo; - - if (bo_fake != NULL && (bo_fake->flags & BM_NO_FENCE_SUBDATA)) - continue; - - if (block->fence && max_fence && !FENCE_LTE(block->fence, max_fence)) - return 0; - - set_dirty(&bo_fake->bo); - bo_fake->block = NULL; - - free_block(bufmgr_fake, block); - return GL_TRUE; - } - - return GL_FALSE; -} - -#define foreach_s_rev(ptr, t, list) \ - for(ptr=(list)->prev,t=(ptr)->prev; list != ptr; ptr=t, t=(t)->prev) - -static GLboolean -evict_mru(dri_bufmgr_fake *bufmgr_fake) -{ - struct block *block, *tmp; - - DBG("%s\n", __FUNCTION__); - - foreach_s_rev(block, tmp, &bufmgr_fake->lru) { - dri_bo_fake *bo_fake = (dri_bo_fake *)block->bo; - - if (bo_fake && (bo_fake->flags & BM_NO_FENCE_SUBDATA)) - continue; - - set_dirty(&bo_fake->bo); - bo_fake->block = NULL; - - free_block(bufmgr_fake, block); - return GL_TRUE; - } - - return GL_FALSE; -} - -/** - * Removes all objects from the fenced list older than the given fence. - */ -static int clear_fenced(dri_bufmgr_fake *bufmgr_fake, - unsigned int fence_cookie) -{ - struct block *block, *tmp; - int ret = 0; - - foreach_s(block, tmp, &bufmgr_fake->fenced) { - assert(block->fenced); - - if (_fence_test(bufmgr_fake, block->fence)) { - - block->fenced = 0; - - if (!block->bo) { - DBG("delayed free: offset %x sz %x\n", - block->mem->ofs, block->mem->size); - remove_from_list(block); - mmFreeMem(block->mem); - free(block); - } - else { - DBG("return to lru: offset %x sz %x\n", - block->mem->ofs, block->mem->size); - move_to_tail(&bufmgr_fake->lru, block); - } - - ret = 1; - } - else { - /* Blocks are ordered by fence, so if one fails, all from - * here will fail also: - */ - DBG("fence not passed: offset %x sz %x %d %d \n", - block->mem->ofs, block->mem->size, block->fence, bufmgr_fake->last_fence); - break; - } - } - - DBG("%s: %d\n", __FUNCTION__, ret); - return ret; -} - -static void fence_blocks(dri_bufmgr_fake *bufmgr_fake, unsigned fence) -{ - struct block *block, *tmp; - - foreach_s (block, tmp, &bufmgr_fake->on_hardware) { - DBG("Fence block %p (sz 0x%x ofs %x buf %p) with fence %d\n", block, - block->mem->size, block->mem->ofs, block->bo, fence); - block->fence = fence; - - block->on_hardware = 0; - block->fenced = 1; - - /* Move to tail of pending list here - */ - move_to_tail(&bufmgr_fake->fenced, block); - } - - assert(is_empty_list(&bufmgr_fake->on_hardware)); -} - -static GLboolean evict_and_alloc_block(dri_bo *bo) -{ - dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; - dri_bo_fake *bo_fake = (dri_bo_fake *)bo; - - assert(bo_fake->block == NULL); - - /* Search for already free memory: - */ - if (alloc_block(bo)) - return GL_TRUE; - - /* If we're not thrashing, allow lru eviction to dig deeper into - * recently used textures. We'll probably be thrashing soon: - */ - if (!bufmgr_fake->thrashing) { - while (evict_lru(bufmgr_fake, 0)) - if (alloc_block(bo)) - return GL_TRUE; - } - - /* Keep thrashing counter alive? - */ - if (bufmgr_fake->thrashing) - bufmgr_fake->thrashing = 20; - - /* Wait on any already pending fences - here we are waiting for any - * freed memory that has been submitted to hardware and fenced to - * become available: - */ - while (!is_empty_list(&bufmgr_fake->fenced)) { - GLuint fence = bufmgr_fake->fenced.next->fence; - _fence_wait_internal(bufmgr_fake, fence); - - if (alloc_block(bo)) - return GL_TRUE; - } - - if (!is_empty_list(&bufmgr_fake->on_hardware)) { - while (!is_empty_list(&bufmgr_fake->fenced)) { - GLuint fence = bufmgr_fake->fenced.next->fence; - _fence_wait_internal(bufmgr_fake, fence); - } - - if (!bufmgr_fake->thrashing) { - DBG("thrashing\n"); - } - bufmgr_fake->thrashing = 20; - - if (alloc_block(bo)) - return GL_TRUE; - } - - while (evict_mru(bufmgr_fake)) - if (alloc_block(bo)) - return GL_TRUE; - - DBG("%s 0x%x bytes failed\n", __FUNCTION__, bo->size); - - return GL_FALSE; -} - -/*********************************************************************** - * Public functions - */ - -/** - * Wait for hardware idle by emitting a fence and waiting for it. - */ -static void -dri_bufmgr_fake_wait_idle(dri_bufmgr_fake *bufmgr_fake) -{ - unsigned int cookie; - - cookie = bufmgr_fake->fence_emit(bufmgr_fake->driver_priv); - _fence_wait_internal(bufmgr_fake, cookie); -} - -/** - * Wait for rendering to a buffer to complete. - * - * It is assumed that the bathcbuffer which performed the rendering included - * the necessary flushing. - */ -static void -dri_fake_bo_wait_rendering(dri_bo *bo) -{ - dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; - dri_bo_fake *bo_fake = (dri_bo_fake *)bo; - - if (bo_fake->block == NULL || !bo_fake->block->fenced) - return; - - _fence_wait_internal(bufmgr_fake, bo_fake->block->fence); -} - -/* Specifically ignore texture memory sharing. - * -- just evict everything - * -- and wait for idle - */ -void -dri_bufmgr_fake_contended_lock_take(dri_bufmgr *bufmgr) -{ - dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bufmgr; - struct block *block, *tmp; - - bufmgr_fake->need_fence = 1; - bufmgr_fake->fail = 0; - - /* Wait for hardware idle. We don't know where acceleration has been - * happening, so we'll need to wait anyway before letting anything get - * put on the card again. - */ - dri_bufmgr_fake_wait_idle(bufmgr_fake); - - /* Check that we hadn't released the lock without having fenced the last - * set of buffers. - */ - assert(is_empty_list(&bufmgr_fake->fenced)); - assert(is_empty_list(&bufmgr_fake->on_hardware)); - - foreach_s(block, tmp, &bufmgr_fake->lru) { - assert(_fence_test(bufmgr_fake, block->fence)); - set_dirty(block->bo); - } -} - -static dri_bo * -dri_fake_bo_alloc(dri_bufmgr *bufmgr, const char *name, - unsigned long size, unsigned int alignment, - uint64_t location_mask) -{ - dri_bufmgr_fake *bufmgr_fake; - dri_bo_fake *bo_fake; - - bufmgr_fake = (dri_bufmgr_fake *)bufmgr; - - assert(size != 0); - - bo_fake = calloc(1, sizeof(*bo_fake)); - if (!bo_fake) - return NULL; - - bo_fake->bo.size = size; - bo_fake->bo.offset = -1; - bo_fake->bo.virtual = NULL; - bo_fake->bo.bufmgr = bufmgr; - bo_fake->refcount = 1; - - /* Alignment must be a power of two */ - assert((alignment & (alignment - 1)) == 0); - if (alignment == 0) - alignment = 1; - bo_fake->alignment = alignment; - bo_fake->id = ++bufmgr_fake->buf_nr; - bo_fake->name = name; - bo_fake->flags = 0; - bo_fake->is_static = GL_FALSE; - - DBG("drm_bo_alloc: (buf %d: %s, %d kb)\n", bo_fake->id, bo_fake->name, - bo_fake->bo.size / 1024); - - return &bo_fake->bo; -} - -static dri_bo * -dri_fake_bo_alloc_static(dri_bufmgr *bufmgr, const char *name, - unsigned long offset, unsigned long size, - void *virtual, uint64_t location_mask) -{ - dri_bufmgr_fake *bufmgr_fake; - dri_bo_fake *bo_fake; - - bufmgr_fake = (dri_bufmgr_fake *)bufmgr; - - assert(size != 0); - - bo_fake = calloc(1, sizeof(*bo_fake)); - if (!bo_fake) - return NULL; - - bo_fake->bo.size = size; - bo_fake->bo.offset = offset; - bo_fake->bo.virtual = virtual; - bo_fake->bo.bufmgr = bufmgr; - bo_fake->refcount = 1; - bo_fake->id = ++bufmgr_fake->buf_nr; - bo_fake->name = name; - bo_fake->flags = BM_PINNED | DRM_BO_FLAG_NO_MOVE; - bo_fake->is_static = GL_TRUE; - - DBG("drm_bo_alloc_static: (buf %d: %s, %d kb)\n", bo_fake->id, bo_fake->name, - bo_fake->bo.size / 1024); - - return &bo_fake->bo; -} - -static void -dri_fake_bo_reference(dri_bo *bo) -{ - dri_bo_fake *bo_fake = (dri_bo_fake *)bo; - - bo_fake->refcount++; -} - -static void -dri_fake_bo_unreference(dri_bo *bo) -{ - dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; - dri_bo_fake *bo_fake = (dri_bo_fake *)bo; - int i; - - if (!bo) - return; - - if (--bo_fake->refcount == 0) { - assert(bo_fake->map_count == 0); - /* No remaining references, so free it */ - if (bo_fake->block) - free_block(bufmgr_fake, bo_fake->block); - free_backing_store(bo); - - for (i = 0; i < bo_fake->nr_relocs; i++) - dri_bo_unreference(bo_fake->relocs[i].target_buf); - - DBG("drm_bo_unreference: free buf %d %s\n", bo_fake->id, bo_fake->name); - - free(bo_fake->relocs); - free(bo); - - return; - } -} - -/** - * Set the buffer as not requiring backing store, and instead get the callback - * invoked whenever it would be set dirty. - */ -void dri_bo_fake_disable_backing_store(dri_bo *bo, - void (*invalidate_cb)(dri_bo *bo, - void *ptr), - void *ptr) -{ - dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; - dri_bo_fake *bo_fake = (dri_bo_fake *)bo; - - if (bo_fake->backing_store) - free_backing_store(bo); - - bo_fake->flags |= BM_NO_BACKING_STORE; - - DBG("disable_backing_store set buf %d dirty\n", bo_fake->id); - bo_fake->dirty = 1; - bo_fake->invalidate_cb = invalidate_cb; - bo_fake->invalidate_ptr = ptr; - - /* Note that it is invalid right from the start. Also note - * invalidate_cb is called with the bufmgr locked, so cannot - * itself make bufmgr calls. - */ - if (invalidate_cb != NULL) - invalidate_cb(bo, ptr); -} - -/** - * Map a buffer into bo->virtual, allocating either card memory space (If - * BM_NO_BACKING_STORE or BM_PINNED) or backing store, as necessary. - */ -static int -dri_fake_bo_map(dri_bo *bo, GLboolean write_enable) -{ - dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; - dri_bo_fake *bo_fake = (dri_bo_fake *)bo; - - /* Static buffers are always mapped. */ - if (bo_fake->is_static) - return 0; - - /* Allow recursive mapping. Mesa may recursively map buffers with - * nested display loops, and it is used internally in bufmgr_fake - * for relocation. - */ - if (bo_fake->map_count++ != 0) - return 0; - - { - DBG("drm_bo_map: (buf %d: %s, %d kb)\n", bo_fake->id, bo_fake->name, - bo_fake->bo.size / 1024); - - if (bo->virtual != NULL) { - _mesa_printf("%s: already mapped\n", __FUNCTION__); - abort(); - } - else if (bo_fake->flags & (BM_NO_BACKING_STORE|BM_PINNED)) { - - if (!bo_fake->block && !evict_and_alloc_block(bo)) { - DBG("%s: alloc failed\n", __FUNCTION__); - bufmgr_fake->fail = 1; - return 1; - } - else { - assert(bo_fake->block); - bo_fake->dirty = 0; - - if (!(bo_fake->flags & BM_NO_FENCE_SUBDATA) && - bo_fake->block->fenced) { - dri_fake_bo_wait_rendering(bo); - } - - bo->virtual = bo_fake->block->virtual; - } - } - else { - if (write_enable) - set_dirty(bo); - - if (bo_fake->backing_store == 0) - alloc_backing_store(bo); - - bo->virtual = bo_fake->backing_store; - } - } - - return 0; -} - -static int -dri_fake_bo_unmap(dri_bo *bo) -{ - dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; - dri_bo_fake *bo_fake = (dri_bo_fake *)bo; - - /* Static buffers are always mapped. */ - if (bo_fake->is_static) - return 0; - - assert(bo_fake->map_count != 0); - if (--bo_fake->map_count != 0) - return 0; - - DBG("drm_bo_unmap: (buf %d: %s, %d kb)\n", bo_fake->id, bo_fake->name, - bo_fake->bo.size / 1024); - - bo->virtual = NULL; - - return 0; -} - -static void -dri_fake_kick_all(dri_bufmgr_fake *bufmgr_fake) -{ - struct block *block, *tmp; - - bufmgr_fake->performed_rendering = GL_FALSE; - /* okay for ever BO that is on the HW kick it off. - seriously not afraid of the POLICE right now */ - foreach_s(block, tmp, &bufmgr_fake->on_hardware) { - dri_bo_fake *bo_fake = (dri_bo_fake *)block->bo; - - block->on_hardware = 0; - free_block(bufmgr_fake, block); - bo_fake->block = NULL; - bo_fake->validated = GL_FALSE; - if (!(bo_fake->flags & BM_NO_BACKING_STORE)) - bo_fake->dirty = 1; - } -} - -static int -dri_fake_bo_validate(dri_bo *bo) -{ - dri_bufmgr_fake *bufmgr_fake; - dri_bo_fake *bo_fake = (dri_bo_fake *)bo; - - /* XXX: Sanity-check whether we've already validated this one under - * different flags. See drmAddValidateItem(). - */ - bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; - - DBG("drm_bo_validate: (buf %d: %s, %d kb)\n", bo_fake->id, bo_fake->name, - bo_fake->bo.size / 1024); - - /* Sanity check: Buffers should be unmapped before being validated. - * This is not so much of a problem for bufmgr_fake, but TTM refuses, - * and the problem is harder to debug there. - */ - assert(bo_fake->map_count == 0); - - if (bo_fake->is_static) { - /* Add it to the needs-fence list */ - bufmgr_fake->need_fence = 1; - return 0; - } - - /* reset size accounted */ - bo_fake->size_accounted = 0; - - /* Allocate the card memory */ - if (!bo_fake->block && !evict_and_alloc_block(bo)) { - bufmgr_fake->fail = 1; - DBG("Failed to validate buf %d:%s\n", bo_fake->id, bo_fake->name); - return -1; - } - - assert(bo_fake->block); - assert(bo_fake->block->bo == &bo_fake->bo); - - bo->offset = bo_fake->block->mem->ofs; - - /* Upload the buffer contents if necessary */ - if (bo_fake->dirty) { - DBG("Upload dirty buf %d:%s, sz %d offset 0x%x\n", bo_fake->id, - bo_fake->name, bo->size, bo_fake->block->mem->ofs); - - assert(!(bo_fake->flags & - (BM_NO_BACKING_STORE|BM_PINNED))); - - /* Actually, should be able to just wait for a fence on the memory, - * which we would be tracking when we free it. Waiting for idle is - * a sufficiently large hammer for now. - */ - dri_bufmgr_fake_wait_idle(bufmgr_fake); - - /* we may never have mapped this BO so it might not have any backing - * store if this happens it should be rare, but 0 the card memory - * in any case */ - if (bo_fake->backing_store) - memcpy(bo_fake->block->virtual, bo_fake->backing_store, bo->size); - else - memset(bo_fake->block->virtual, 0, bo->size); - - bo_fake->dirty = 0; - } - - bo_fake->block->fenced = 0; - bo_fake->block->on_hardware = 1; - move_to_tail(&bufmgr_fake->on_hardware, bo_fake->block); - - bo_fake->validated = GL_TRUE; - bufmgr_fake->need_fence = 1; - - return 0; -} - -static void -dri_fake_fence_validated(dri_bufmgr *bufmgr) -{ - dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bufmgr; - unsigned int cookie; - - cookie = _fence_emit_internal(bufmgr_fake); - fence_blocks(bufmgr_fake, cookie); - - DBG("drm_fence_validated: 0x%08x cookie\n", cookie); -} - -static void -dri_fake_destroy(dri_bufmgr *bufmgr) -{ - dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bufmgr; - - mmDestroy(bufmgr_fake->heap); - free(bufmgr); -} - -static int -dri_fake_emit_reloc(dri_bo *reloc_buf, - uint32_t read_domains, uint32_t write_domain, - uint32_t delta, uint32_t offset, dri_bo *target_buf) -{ - dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)reloc_buf->bufmgr; - struct fake_buffer_reloc *r; - dri_bo_fake *reloc_fake = (dri_bo_fake *)reloc_buf; - dri_bo_fake *target_fake = (dri_bo_fake *)target_buf; - int i; - - assert(reloc_buf); - assert(target_buf); - - assert(target_fake->is_static || target_fake->size_accounted); - - if (reloc_fake->relocs == NULL) { - reloc_fake->relocs = malloc(sizeof(struct fake_buffer_reloc) * - MAX_RELOCS); - } - - r = &reloc_fake->relocs[reloc_fake->nr_relocs++]; - - assert(reloc_fake->nr_relocs <= MAX_RELOCS); - - dri_bo_reference(target_buf); - - r->target_buf = target_buf; - r->offset = offset; - r->last_target_offset = target_buf->offset; - r->delta = delta; - r->read_domains = read_domains; - r->write_domain = write_domain; - - if (bufmgr_fake->debug) { - /* Check that a conflicting relocation hasn't already been emitted. */ - for (i = 0; i < reloc_fake->nr_relocs - 1; i++) { - struct fake_buffer_reloc *r2 = &reloc_fake->relocs[i]; - - assert(r->offset != r2->offset); - } - } - - return 0; -} - -/** - * Incorporates the validation flags associated with each relocation into - * the combined validation flags for the buffer on this batchbuffer submission. - */ -static void -dri_fake_calculate_domains(dri_bo *bo) -{ - dri_bo_fake *bo_fake = (dri_bo_fake *)bo; - int i; - - for (i = 0; i < bo_fake->nr_relocs; i++) { - struct fake_buffer_reloc *r = &bo_fake->relocs[i]; - dri_bo_fake *target_fake = (dri_bo_fake *)r->target_buf; - - /* Do the same for the tree of buffers we depend on */ - dri_fake_calculate_domains(r->target_buf); - - target_fake->read_domains |= r->read_domains; - if (target_fake->write_domain != 0) - target_fake->write_domain = r->write_domain; - } -} - - -static int -dri_fake_reloc_and_validate_buffer(dri_bo *bo) -{ - dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; - dri_bo_fake *bo_fake = (dri_bo_fake *)bo; - int i, ret; - - assert(bo_fake->map_count == 0); - - for (i = 0; i < bo_fake->nr_relocs; i++) { - struct fake_buffer_reloc *r = &bo_fake->relocs[i]; - dri_bo_fake *target_fake = (dri_bo_fake *)r->target_buf; - uint32_t reloc_data; - - /* Validate the target buffer if that hasn't been done. */ - if (!target_fake->validated) { - ret = dri_fake_reloc_and_validate_buffer(r->target_buf); - if (ret != 0) { - if (bo->virtual != NULL) - dri_bo_unmap(bo); - return ret; - } - } - - /* Calculate the value of the relocation entry. */ - if (r->target_buf->offset != r->last_target_offset) { - reloc_data = r->target_buf->offset + r->delta; - - if (bo->virtual == NULL) - dri_bo_map(bo, GL_TRUE); - - *(uint32_t *)(bo->virtual + r->offset) = reloc_data; - - r->last_target_offset = r->target_buf->offset; - } - } - - if (bo->virtual != NULL) - dri_bo_unmap(bo); - - if (bo_fake->write_domain != 0) { - if (!(bo_fake->flags & (BM_NO_BACKING_STORE|BM_PINNED))) { - if (bo_fake->backing_store == 0) - alloc_backing_store(bo); - - bo_fake->card_dirty = 1; - } - bufmgr_fake->performed_rendering = GL_TRUE; - } - - return dri_fake_bo_validate(bo); -} - -static void * -dri_fake_process_relocs(dri_bo *batch_buf) -{ - dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)batch_buf->bufmgr; - dri_bo_fake *batch_fake = (dri_bo_fake *)batch_buf; - int ret; - int retry_count = 0; - - bufmgr_fake->performed_rendering = GL_FALSE; - - dri_fake_calculate_domains(batch_buf); - - batch_fake->read_domains = DRM_GEM_DOMAIN_I915_COMMAND; - - /* we've ran out of RAM so blow the whole lot away and retry */ - restart: - ret = dri_fake_reloc_and_validate_buffer(batch_buf); - if (bufmgr_fake->fail == 1) { - if (retry_count == 0) { - retry_count++; - dri_fake_kick_all(bufmgr_fake); - bufmgr_fake->fail = 0; - goto restart; - } else /* dump out the memory here */ - mmDumpMemInfo(bufmgr_fake->heap); - } - - assert(ret == 0); - - bufmgr_fake->current_total_size = 0; - return NULL; -} - -static void -dri_bo_fake_post_submit(dri_bo *bo) -{ - dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; - dri_bo_fake *bo_fake = (dri_bo_fake *)bo; - int i; - - for (i = 0; i < bo_fake->nr_relocs; i++) { - struct fake_buffer_reloc *r = &bo_fake->relocs[i]; - dri_bo_fake *target_fake = (dri_bo_fake *)r->target_buf; - - if (target_fake->validated) - dri_bo_fake_post_submit(r->target_buf); - - DBG("%s@0x%08x + 0x%08x -> %s@0x%08x + 0x%08x\n", - bo_fake->name, (uint32_t)bo->offset, r->offset, - target_fake->name, (uint32_t)r->target_buf->offset, r->delta); - } - - assert(bo_fake->map_count == 0); - bo_fake->validated = GL_FALSE; - bo_fake->read_domains = 0; - bo_fake->write_domain = 0; -} - - -static void -dri_fake_post_submit(dri_bo *batch_buf) -{ - dri_fake_fence_validated(batch_buf->bufmgr); - - dri_bo_fake_post_submit(batch_buf); -} - -static int -dri_fake_check_aperture_space(dri_bo *bo) -{ - dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr; - dri_bo_fake *bo_fake = (dri_bo_fake *)bo; - GLuint sz; - - sz = (bo->size + bo_fake->alignment - 1) & ~(bo_fake->alignment - 1); - - if (bo_fake->size_accounted || bo_fake->is_static) - return 0; - - if (bufmgr_fake->current_total_size + sz > bufmgr_fake->size) { - DBG("check_space: %s bo %d %d overflowed bufmgr size %d\n", bo_fake->name, bo_fake->id, sz, bufmgr_fake->size); - return -1; - } - - bufmgr_fake->current_total_size += sz; - bo_fake->size_accounted = 1; - DBG("drm_check_space: buf %d, %s %d %d\n", bo_fake->id, bo_fake->name, bo->size, bufmgr_fake->current_total_size); - return 0; -} - -dri_bufmgr * -dri_bufmgr_fake_init(unsigned long low_offset, void *low_virtual, - unsigned long size, - unsigned int (*fence_emit)(void *private), - int (*fence_wait)(void *private, unsigned int cookie), - void *driver_priv) -{ - dri_bufmgr_fake *bufmgr_fake; - - bufmgr_fake = calloc(1, sizeof(*bufmgr_fake)); - - /* Initialize allocator */ - make_empty_list(&bufmgr_fake->fenced); - make_empty_list(&bufmgr_fake->on_hardware); - make_empty_list(&bufmgr_fake->lru); - - bufmgr_fake->low_offset = low_offset; - bufmgr_fake->virtual = low_virtual; - bufmgr_fake->size = size; - bufmgr_fake->heap = mmInit(low_offset, size); - - /* Hook in methods */ - bufmgr_fake->bufmgr.bo_alloc = dri_fake_bo_alloc; - bufmgr_fake->bufmgr.bo_alloc_static = dri_fake_bo_alloc_static; - bufmgr_fake->bufmgr.bo_reference = dri_fake_bo_reference; - bufmgr_fake->bufmgr.bo_unreference = dri_fake_bo_unreference; - bufmgr_fake->bufmgr.bo_map = dri_fake_bo_map; - bufmgr_fake->bufmgr.bo_unmap = dri_fake_bo_unmap; - bufmgr_fake->bufmgr.bo_wait_rendering = dri_fake_bo_wait_rendering; - bufmgr_fake->bufmgr.destroy = dri_fake_destroy; - bufmgr_fake->bufmgr.emit_reloc = dri_fake_emit_reloc; - bufmgr_fake->bufmgr.process_relocs = dri_fake_process_relocs; - bufmgr_fake->bufmgr.post_submit = dri_fake_post_submit; - bufmgr_fake->bufmgr.check_aperture_space = dri_fake_check_aperture_space; - bufmgr_fake->bufmgr.debug = GL_FALSE; - - bufmgr_fake->fence_emit = fence_emit; - bufmgr_fake->fence_wait = fence_wait; - bufmgr_fake->driver_priv = driver_priv; - - return &bufmgr_fake->bufmgr; -} - diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_fake.h b/src/mesa/drivers/dri/intel/intel_bufmgr_fake.h deleted file mode 100644 index bc7e59e61d..0000000000 --- a/src/mesa/drivers/dri/intel/intel_bufmgr_fake.h +++ /dev/null @@ -1,50 +0,0 @@ -/************************************************************************** - * - * Copyright © 2007 Intel Corporation - * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * - **************************************************************************/ -/* - * Authors: Thomas Hellström - * Keith Whitwell - * Eric Anholt - */ - -#ifndef _INTEL_BUFMGR_FAKE_H_ -#define _INTEL_BUFMGR_FAKE_H_ - -void dri_bufmgr_fake_contended_lock_take(dri_bufmgr *bufmgr); -dri_bufmgr *dri_bufmgr_fake_init(unsigned long low_offset, void *low_virtual, - unsigned long size, - unsigned int (*fence_emit)(void *private), - int (*fence_wait)(void *private, - unsigned int cookie), - void *driver_priv); -void dri_bo_fake_disable_backing_store(dri_bo *bo, - void (*invalidate_cb)(dri_bo *bo, - void *ptr), - void *ptr); -#endif /* _INTEL_BUFMGR_FAKE_H_ */ - diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c deleted file mode 100644 index 3c1c3157e1..0000000000 --- a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.c +++ /dev/null @@ -1,847 +0,0 @@ -/************************************************************************** - * - * Copyright © 2007 Red Hat Inc. - * Copyright © 2007 Intel Corporation - * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * - **************************************************************************/ -/* - * Authors: Thomas Hellström - * Keith Whitwell - * Eric Anholt - * Dave Airlie - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "errno.h" -#include "mtypes.h" -#include "dri_bufmgr.h" -#include "string.h" -#include "imports.h" - -#include "i915_drm.h" - -#include "intel_bufmgr_gem.h" - -#define DBG(...) do { \ - if (bufmgr_gem->bufmgr.debug) \ - fprintf(stderr, __VA_ARGS__); \ -} while (0) - -struct intel_validate_entry { - dri_bo *bo; - struct drm_i915_op_arg bo_arg; -}; - -struct dri_gem_bo_bucket_entry { - uint32_t gem_handle; - uint32_t last_offset; - struct dri_gem_bo_bucket_entry *next; -}; - -struct dri_gem_bo_bucket { - struct dri_gem_bo_bucket_entry *head; - struct dri_gem_bo_bucket_entry **tail; - /** - * Limit on the number of entries in this bucket. - * - * 0 means that this caching at this bucket size is disabled. - * -1 means that there is no limit to caching at this size. - */ - int max_entries; - int num_entries; -}; - -/* Arbitrarily chosen, 16 means that the maximum size we'll cache for reuse - * is 1 << 16 pages, or 256MB. - */ -#define INTEL_GEM_BO_BUCKETS 16 -typedef struct _dri_bufmgr_gem { - dri_bufmgr bufmgr; - - int fd; - - uint32_t max_relocs; - - struct drm_i915_gem_exec_object *exec_objects; - dri_bo **exec_bos; - int exec_size; - int exec_count; - - /** Array of lists of cached gem objects of power-of-two sizes */ - struct dri_gem_bo_bucket cache_bucket[INTEL_GEM_BO_BUCKETS]; - - struct drm_i915_gem_execbuffer exec_arg; -} dri_bufmgr_gem; - -typedef struct _dri_bo_gem { - dri_bo bo; - - int refcount; - GLboolean mapped; - uint32_t gem_handle; - const char *name; - - /** - * Index of the buffer within the validation list while preparing a - * batchbuffer execution. - */ - int validate_index; - - /** - * Tracks whether set_domain to CPU is current - * Set when set_domain has been called - * Cleared when a batch has been submitted - */ - GLboolean cpu_domain_set; - - /** Array passed to the DRM containing relocation information. */ - struct drm_i915_gem_relocation_entry *relocs; - /** Array of bos corresponding to relocs[i].target_handle */ - dri_bo **reloc_target_bo; - /** Number of entries in relocs */ - int reloc_count; - /** Mapped address for the buffer */ - void *virtual; -} dri_bo_gem; - -static int -logbase2(int n) -{ - GLint i = 1; - GLint log2 = 0; - - while (n > i) { - i *= 2; - log2++; - } - - return log2; -} - -static struct dri_gem_bo_bucket * -dri_gem_bo_bucket_for_size(dri_bufmgr_gem *bufmgr_gem, unsigned long size) -{ - int i; - - /* We only do buckets in power of two increments */ - if ((size & (size - 1)) != 0) - return NULL; - - /* We should only see sizes rounded to pages. */ - assert((size % 4096) == 0); - - /* We always allocate in units of pages */ - i = ffs(size / 4096) - 1; - if (i >= INTEL_GEM_BO_BUCKETS) - return NULL; - - return &bufmgr_gem->cache_bucket[i]; -} - - -static void dri_gem_dump_validation_list(dri_bufmgr_gem *bufmgr_gem) -{ - int i, j; - - for (i = 0; i < bufmgr_gem->exec_count; i++) { - dri_bo *bo = bufmgr_gem->exec_bos[i]; - dri_bo_gem *bo_gem = (dri_bo_gem *)bo; - - if (bo_gem->relocs == NULL) { - DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle, bo_gem->name); - continue; - } - - for (j = 0; j < bo_gem->reloc_count; j++) { - dri_bo *target_bo = bo_gem->reloc_target_bo[j]; - dri_bo_gem *target_gem = (dri_bo_gem *)target_bo; - - DBG("%2d: %d (%s)@0x%08llx -> %d (%s)@0x%08lx + 0x%08x\n", - i, - bo_gem->gem_handle, bo_gem->name, bo_gem->relocs[j].offset, - target_gem->gem_handle, target_gem->name, target_bo->offset, - bo_gem->relocs[j].delta); - } - } -} - -/** - * Adds the given buffer to the list of buffers to be validated (moved into the - * appropriate memory type) with the next batch submission. - * - * If a buffer is validated multiple times in a batch submission, it ends up - * with the intersection of the memory type flags and the union of the - * access flags. - */ -static void -intel_add_validate_buffer(dri_bo *bo) -{ - dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bo->bufmgr; - dri_bo_gem *bo_gem = (dri_bo_gem *)bo; - int index; - - if (bo_gem->validate_index != -1) - return; - - /* Extend the array of validation entries as necessary. */ - if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { - int new_size = bufmgr_gem->exec_size * 2; - - if (new_size == 0) - new_size = 5; - - bufmgr_gem->exec_objects = - realloc(bufmgr_gem->exec_objects, - sizeof(*bufmgr_gem->exec_objects) * new_size); - bufmgr_gem->exec_bos = - realloc(bufmgr_gem->exec_bos, - sizeof(*bufmgr_gem->exec_bos) * new_size); - bufmgr_gem->exec_size = new_size; - } - - index = bufmgr_gem->exec_count; - bo_gem->validate_index = index; - /* Fill in array entry */ - bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle; - bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count; - bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs; - bufmgr_gem->exec_objects[index].alignment = 0; - bufmgr_gem->exec_objects[index].offset = 0; - bufmgr_gem->exec_bos[index] = bo; - dri_bo_reference(bo); - bufmgr_gem->exec_count++; -} - - -#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \ - sizeof(uint32_t)) - -static int -intel_setup_reloc_list(dri_bo *bo) -{ - dri_bo_gem *bo_gem = (dri_bo_gem *)bo; - dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bo->bufmgr; - - bo_gem->relocs = malloc(bufmgr_gem->max_relocs * - sizeof(struct drm_i915_gem_relocation_entry)); - bo_gem->reloc_target_bo = malloc(bufmgr_gem->max_relocs * sizeof(dri_bo *)); - - return 0; -} - -static dri_bo * -dri_gem_bo_alloc(dri_bufmgr *bufmgr, const char *name, - unsigned long size, unsigned int alignment, - uint64_t location_mask) -{ - dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bufmgr; - dri_bo_gem *bo_gem; - unsigned int page_size = getpagesize(); - int ret; - struct dri_gem_bo_bucket *bucket; - GLboolean alloc_from_cache = GL_FALSE; - - bo_gem = calloc(1, sizeof(*bo_gem)); - if (!bo_gem) - return NULL; - - /* Round the allocated size up to a power of two number of pages. */ - bo_gem->bo.size = 1 << logbase2(size); - if (bo_gem->bo.size < page_size) - bo_gem->bo.size = page_size; - bucket = dri_gem_bo_bucket_for_size(bufmgr_gem, bo_gem->bo.size); - - /* If we don't have caching at this size, don't actually round the - * allocation up. - */ - if (bucket == NULL || bucket->max_entries == 0) { - bo_gem->bo.size = size; - if (bo_gem->bo.size < page_size) - bo_gem->bo.size = page_size; - } - - /* Get a buffer out of the cache if available */ - if (bucket != NULL && bucket->num_entries > 0) { - struct dri_gem_bo_bucket_entry *entry = bucket->head; - struct drm_i915_gem_busy busy; - - busy.handle = entry->gem_handle; - ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); - alloc_from_cache = (ret == 0 && busy.busy == 0); - - if (alloc_from_cache) { - bucket->head = entry->next; - if (entry->next == NULL) - bucket->tail = &bucket->head; - bucket->num_entries--; - - bo_gem->gem_handle = entry->gem_handle; - bo_gem->bo.offset = entry->last_offset; - free(entry); - } - } - - if (!alloc_from_cache) { - struct drm_gem_create create; - - memset(&create, 0, sizeof(create)); - create.size = bo_gem->bo.size; - - ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CREATE, &create); - bo_gem->gem_handle = create.handle; - if (ret != 0) { - free(bo_gem); - return NULL; - } - } - - bo_gem->bo.virtual = NULL; - bo_gem->bo.bufmgr = bufmgr; - bo_gem->name = name; - bo_gem->refcount = 1; - bo_gem->validate_index = -1; - - DBG("bo_create: buf %d (%s) %ldb\n", - bo_gem->gem_handle, bo_gem->name, size); - - return &bo_gem->bo; -} - -/* Our GEM backend doesn't allow creation of static buffers, as that requires - * privelege for the non-fake case, and the lock in the fake case where we were - * working around the X Server not creating buffers and passing handles to us. - */ -static dri_bo * -dri_gem_bo_alloc_static(dri_bufmgr *bufmgr, const char *name, - unsigned long offset, unsigned long size, void *virtual, - uint64_t location_mask) -{ - return NULL; -} - -/** - * Returns a dri_bo wrapping the given buffer object handle. - * - * This can be used when one application needs to pass a buffer object - * to another. - */ -dri_bo * -intel_gem_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name, - unsigned int handle) -{ - dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bufmgr; - dri_bo_gem *bo_gem; - int ret; - struct drm_gem_open open_arg; - - bo_gem = calloc(1, sizeof(*bo_gem)); - if (!bo_gem) - return NULL; - - memset(&open_arg, 0, sizeof(open_arg)); - open_arg.name = handle; - ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_OPEN, &open_arg); - if (ret != 0) { - fprintf(stderr, "Couldn't reference %s handle 0x%08x: %s\n", - name, handle, strerror(-ret)); - free(bo_gem); - return NULL; - } - bo_gem->bo.size = open_arg.size; - bo_gem->bo.offset = 0; - bo_gem->bo.virtual = NULL; - bo_gem->bo.bufmgr = bufmgr; - bo_gem->name = name; - bo_gem->refcount = 1; - bo_gem->validate_index = -1; - bo_gem->gem_handle = open_arg.handle; - - DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name); - - return &bo_gem->bo; -} - -static void -dri_gem_bo_reference(dri_bo *bo) -{ - dri_bo_gem *bo_gem = (dri_bo_gem *)bo; - - bo_gem->refcount++; -} - -static void -dri_gem_bo_unreference(dri_bo *bo) -{ - dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bo->bufmgr; - dri_bo_gem *bo_gem = (dri_bo_gem *)bo; - - if (!bo) - return; - - if (--bo_gem->refcount == 0) { - struct dri_gem_bo_bucket *bucket; - int ret; - - if (bo_gem->mapped) - munmap (bo_gem->virtual, bo->size); - - if (bo_gem->relocs != NULL) { - int i; - - /* Unreference all the target buffers */ - for (i = 0; i < bo_gem->reloc_count; i++) - dri_bo_unreference(bo_gem->reloc_target_bo[i]); - free(bo_gem->reloc_target_bo); - free(bo_gem->relocs); - } - - bucket = dri_gem_bo_bucket_for_size(bufmgr_gem, bo->size); - /* Put the buffer into our internal cache for reuse if we can. */ - if (bucket != NULL && - (bucket->max_entries == -1 || - (bucket->max_entries > 0 && - bucket->num_entries < bucket->max_entries))) - { - struct dri_gem_bo_bucket_entry *entry; - - entry = calloc(1, sizeof(*entry)); - entry->gem_handle = bo_gem->gem_handle; - entry->last_offset = bo->offset; - - entry->next = NULL; - *bucket->tail = entry; - bucket->tail = &entry->next; - bucket->num_entries++; - } else { - struct drm_gem_close close; - - /* Close this object */ - close.handle = bo_gem->gem_handle; - ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close); - if (ret != 0) { - fprintf(stderr, - "DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n", - bo_gem->gem_handle, bo_gem->name, strerror(-ret)); - } - } - - DBG("bo_unreference final: %d (%s)\n", - bo_gem->gem_handle, bo_gem->name); - - free(bo); - return; - } -} - -static int -dri_gem_bo_map(dri_bo *bo, GLboolean write_enable) -{ - dri_bufmgr_gem *bufmgr_gem; - dri_bo_gem *bo_gem = (dri_bo_gem *)bo; - struct drm_gem_set_domain set_domain; - int ret; - - bufmgr_gem = (dri_bufmgr_gem *)bo->bufmgr; - - /* Allow recursive mapping. Mesa may recursively map buffers with - * nested display loops. - */ - if (!bo_gem->mapped) { - - assert(bo->virtual == NULL); - - DBG("bo_map: %d (%s)\n", bo_gem->gem_handle, bo_gem->name); - - if (bo_gem->virtual == NULL) { - struct drm_gem_mmap mmap_arg; - - memset(&mmap_arg, 0, sizeof(mmap_arg)); - mmap_arg.handle = bo_gem->gem_handle; - mmap_arg.offset = 0; - mmap_arg.size = bo->size; - ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_MMAP, &mmap_arg); - if (ret != 0) { - fprintf(stderr, "%s:%d: Error mapping buffer %d (%s): %s .\n", - __FILE__, __LINE__, - bo_gem->gem_handle, bo_gem->name, strerror(errno)); - } - bo_gem->virtual = (void *)(uintptr_t)mmap_arg.addr_ptr; - } - bo->virtual = bo_gem->virtual; - bo_gem->mapped = GL_TRUE; - DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, bo_gem->virtual); - } - - if (!bo_gem->cpu_domain_set) { - set_domain.handle = bo_gem->gem_handle; - set_domain.read_domains = DRM_GEM_DOMAIN_CPU; - set_domain.write_domain = write_enable ? DRM_GEM_DOMAIN_CPU : 0; - ret = ioctl (bufmgr_gem->fd, DRM_IOCTL_GEM_SET_DOMAIN, &set_domain); - if (ret != 0) { - fprintf (stderr, "%s:%d: Error setting memory domains %d (%08x %08x): %s .\n", - __FILE__, __LINE__, - bo_gem->gem_handle, set_domain.read_domains, set_domain.write_domain, - strerror (errno)); - } - bo_gem->cpu_domain_set = GL_TRUE; - } - - return 0; -} - -static int -dri_gem_bo_unmap(dri_bo *bo) -{ - dri_bo_gem *bo_gem = (dri_bo_gem *)bo; - - if (bo == NULL) - return 0; - - assert(bo_gem->mapped); - - return 0; -} - -static int -dri_gem_bo_subdata (dri_bo *bo, unsigned long offset, - unsigned long size, const void *data) -{ - dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bo->bufmgr; - dri_bo_gem *bo_gem = (dri_bo_gem *)bo; - struct drm_gem_pwrite pwrite; - int ret; - - memset (&pwrite, 0, sizeof (pwrite)); - pwrite.handle = bo_gem->gem_handle; - pwrite.offset = offset; - pwrite.size = size; - pwrite.data_ptr = (uint64_t) (uintptr_t) data; - ret = ioctl (bufmgr_gem->fd, DRM_IOCTL_GEM_PWRITE, &pwrite); - if (ret != 0) { - fprintf (stderr, "%s:%d: Error writing data to buffer %d: (%d %d) %s .\n", - __FILE__, __LINE__, - bo_gem->gem_handle, (int) offset, (int) size, - strerror (errno)); - } - return 0; -} - -static int -dri_gem_bo_get_subdata (dri_bo *bo, unsigned long offset, - unsigned long size, void *data) -{ - dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bo->bufmgr; - dri_bo_gem *bo_gem = (dri_bo_gem *)bo; - struct drm_gem_pread pread; - int ret; - - memset (&pread, 0, sizeof (pread)); - pread.handle = bo_gem->gem_handle; - pread.offset = offset; - pread.size = size; - pread.data_ptr = (uint64_t) (uintptr_t) data; - ret = ioctl (bufmgr_gem->fd, DRM_IOCTL_GEM_PREAD, &pread); - if (ret != 0) { - fprintf (stderr, "%s:%d: Error reading data from buffer %d: (%d %d) %s .\n", - __FILE__, __LINE__, - bo_gem->gem_handle, (int) offset, (int) size, - strerror (errno)); - } - return 0; -} - -static void -dri_gem_bo_wait_rendering(dri_bo *bo) -{ - dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bo->bufmgr; - dri_bo_gem *bo_gem = (dri_bo_gem *)bo; - struct drm_gem_set_domain set_domain; - int ret; - - set_domain.handle = bo_gem->gem_handle; - set_domain.read_domains = DRM_GEM_DOMAIN_CPU; - set_domain.write_domain = 0; - ret = ioctl (bufmgr_gem->fd, DRM_IOCTL_GEM_SET_DOMAIN, &set_domain); - if (ret != 0) { - fprintf (stderr, "%s:%d: Error setting memory domains %d (%08x %08x): %s .\n", - __FILE__, __LINE__, - bo_gem->gem_handle, set_domain.read_domains, set_domain.write_domain, - strerror (errno)); - } -} - -static void -dri_bufmgr_gem_destroy(dri_bufmgr *bufmgr) -{ - dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bufmgr; - int i; - - free(bufmgr_gem->exec_objects); - free(bufmgr_gem->exec_bos); - - /* Free any cached buffer objects we were going to reuse */ - for (i = 0; i < INTEL_GEM_BO_BUCKETS; i++) { - struct dri_gem_bo_bucket *bucket = &bufmgr_gem->cache_bucket[i]; - struct dri_gem_bo_bucket_entry *entry; - - while ((entry = bucket->head) != NULL) { - struct drm_gem_close close; - int ret; - - bucket->head = entry->next; - if (entry->next == NULL) - bucket->tail = &bucket->head; - bucket->num_entries--; - - /* Close this object */ - close.handle = entry->gem_handle; - ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close); - if (ret != 0) { - fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed: %s\n", - strerror(-ret)); - } - - free(entry); - } - } - - free(bufmgr); -} - -/** - * Adds the target buffer to the validation list and adds the relocation - * to the reloc_buffer's relocation list. - * - * The relocation entry at the given offset must already contain the - * precomputed relocation value, because the kernel will optimize out - * the relocation entry write when the buffer hasn't moved from the - * last known offset in target_bo. - */ -static int -dri_gem_emit_reloc(dri_bo *bo, uint32_t read_domains, uint32_t write_domain, - uint32_t delta, uint32_t offset, dri_bo *target_bo) -{ - dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bo->bufmgr; - dri_bo_gem *bo_gem = (dri_bo_gem *)bo; - dri_bo_gem *target_bo_gem = (dri_bo_gem *)target_bo; - - /* Create a new relocation list if needed */ - if (bo_gem->relocs == NULL) - intel_setup_reloc_list(bo); - - /* Check overflow */ - assert(bo_gem->reloc_count < bufmgr_gem->max_relocs); - - /* Check args */ - assert (offset <= bo->size - 4); - assert ((write_domain & (write_domain-1)) == 0); - - bo_gem->relocs[bo_gem->reloc_count].offset = offset; - bo_gem->relocs[bo_gem->reloc_count].delta = delta; - bo_gem->relocs[bo_gem->reloc_count].target_handle = - target_bo_gem->gem_handle; - bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains; - bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain; - bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset; - - bo_gem->reloc_target_bo[bo_gem->reloc_count] = target_bo; - dri_bo_reference(target_bo); - - bo_gem->reloc_count++; - return 0; -} - -/** - * Walk the tree of relocations rooted at BO and accumulate the list of - * validations to be performed and update the relocation buffers with - * index values into the validation list. - */ -static void -dri_gem_bo_process_reloc(dri_bo *bo) -{ - dri_bo_gem *bo_gem = (dri_bo_gem *)bo; - int i; - - if (bo_gem->relocs == NULL) - return; - - for (i = 0; i < bo_gem->reloc_count; i++) { - dri_bo *target_bo = bo_gem->reloc_target_bo[i]; - - /* Continue walking the tree depth-first. */ - dri_gem_bo_process_reloc(target_bo); - - /* Add the target to the validate list */ - intel_add_validate_buffer(target_bo); - } -} - -static void * -dri_gem_process_reloc(dri_bo *batch_buf) -{ - dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *) batch_buf->bufmgr; - - /* Update indices and set up the validate list. */ - dri_gem_bo_process_reloc(batch_buf); - - /* Add the batch buffer to the validation list. There are no relocations - * pointing to it. - */ - intel_add_validate_buffer(batch_buf); - - bufmgr_gem->exec_arg.buffers_ptr = (uintptr_t)bufmgr_gem->exec_objects; - bufmgr_gem->exec_arg.buffer_count = bufmgr_gem->exec_count; - bufmgr_gem->exec_arg.batch_start_offset = 0; - bufmgr_gem->exec_arg.batch_len = 0; /* written in intel_exec_ioctl */ - - return &bufmgr_gem->exec_arg; -} - -static void -intel_update_buffer_offsets (dri_bufmgr_gem *bufmgr_gem) -{ - int i; - - for (i = 0; i < bufmgr_gem->exec_count; i++) { - dri_bo *bo = bufmgr_gem->exec_bos[i]; - dri_bo_gem *bo_gem = (dri_bo_gem *)bo; - - /* Update the buffer offset */ - if (bufmgr_gem->exec_objects[i].offset != bo->offset) { - DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", - bo_gem->gem_handle, bo_gem->name, bo->offset, - bufmgr_gem->exec_objects[i].offset); - bo->offset = bufmgr_gem->exec_objects[i].offset; - } - } -} - -static void -dri_gem_post_submit(dri_bo *batch_buf) -{ - dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)batch_buf->bufmgr; - int i; - - intel_update_buffer_offsets (bufmgr_gem); - - if (bufmgr_gem->bufmgr.debug) - dri_gem_dump_validation_list(bufmgr_gem); - - for (i = 0; i < bufmgr_gem->exec_count; i++) { - dri_bo *bo = bufmgr_gem->exec_bos[i]; - dri_bo_gem *bo_gem = (dri_bo_gem *)bo; - - /* Need to call set_domain on next bo_map */ - bo_gem->cpu_domain_set = GL_FALSE; - - /* Disconnect the buffer from the validate list */ - bo_gem->validate_index = -1; - dri_bo_unreference(bo); - bufmgr_gem->exec_bos[i] = NULL; - } - bufmgr_gem->exec_count = 0; -} - -/** - * Enables unlimited caching of buffer objects for reuse. - * - * This is potentially very memory expensive, as the cache at each bucket - * size is only bounded by how many buffers of that size we've managed to have - * in flight at once. - */ -void -intel_gem_enable_bo_reuse(dri_bufmgr *bufmgr) -{ - dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bufmgr; - int i; - - for (i = 0; i < INTEL_GEM_BO_BUCKETS; i++) { - bufmgr_gem->cache_bucket[i].max_entries = -1; - } -} - -/* - * - */ -static int -dri_gem_check_aperture_space(dri_bo *bo) -{ - return 0; -} - -/** - * Initializes the GEM buffer manager, which uses the kernel to allocate, map, - * and manage map buffer objections. - * - * \param fd File descriptor of the opened DRM device. - */ -dri_bufmgr * -intel_bufmgr_gem_init(int fd, int batch_size) -{ - dri_bufmgr_gem *bufmgr_gem; - int i; - - bufmgr_gem = calloc(1, sizeof(*bufmgr_gem)); - bufmgr_gem->fd = fd; - - /* Let's go with one relocation per every 2 dwords (but round down a bit - * since a power of two will mean an extra page allocation for the reloc - * buffer). - * - * Every 4 was too few for the blender benchmark. - */ - bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; - - bufmgr_gem->bufmgr.bo_alloc = dri_gem_bo_alloc; - bufmgr_gem->bufmgr.bo_alloc_static = dri_gem_bo_alloc_static; - bufmgr_gem->bufmgr.bo_reference = dri_gem_bo_reference; - bufmgr_gem->bufmgr.bo_unreference = dri_gem_bo_unreference; - bufmgr_gem->bufmgr.bo_map = dri_gem_bo_map; - bufmgr_gem->bufmgr.bo_unmap = dri_gem_bo_unmap; - bufmgr_gem->bufmgr.bo_subdata = dri_gem_bo_subdata; - bufmgr_gem->bufmgr.bo_get_subdata = dri_gem_bo_get_subdata; - bufmgr_gem->bufmgr.bo_wait_rendering = dri_gem_bo_wait_rendering; - bufmgr_gem->bufmgr.destroy = dri_bufmgr_gem_destroy; - bufmgr_gem->bufmgr.emit_reloc = dri_gem_emit_reloc; - bufmgr_gem->bufmgr.process_relocs = dri_gem_process_reloc; - bufmgr_gem->bufmgr.post_submit = dri_gem_post_submit; - bufmgr_gem->bufmgr.debug = GL_FALSE; - bufmgr_gem->bufmgr.check_aperture_space = dri_gem_check_aperture_space; - /* Initialize the linked lists for BO reuse cache. */ - for (i = 0; i < INTEL_GEM_BO_BUCKETS; i++) - bufmgr_gem->cache_bucket[i].tail = &bufmgr_gem->cache_bucket[i].head; - - return &bufmgr_gem->bufmgr; -} - diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.h b/src/mesa/drivers/dri/intel/intel_bufmgr_gem.h deleted file mode 100644 index 36caeba214..0000000000 --- a/src/mesa/drivers/dri/intel/intel_bufmgr_gem.h +++ /dev/null @@ -1,16 +0,0 @@ - -#ifndef INTEL_BUFMGR_GEM_H -#define INTEL_BUFMGR_GEM_H - -#include "dri_bufmgr.h" - -extern dri_bo *intel_gem_bo_create_from_handle(dri_bufmgr *bufmgr, - const char *name, - unsigned int handle); - -dri_bufmgr *intel_bufmgr_gem_init(int fd, int batch_size); - -void -intel_gem_enable_bo_reuse(dri_bufmgr *bufmgr); - -#endif /* INTEL_BUFMGR_GEM_H */ diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c deleted file mode 100644 index 545913fa31..0000000000 --- a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c +++ /dev/null @@ -1,1102 +0,0 @@ -/************************************************************************** - * - * Copyright © 2007 Red Hat Inc. - * Copyright © 2007 Intel Corporation - * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * - **************************************************************************/ -/* - * Authors: Thomas Hellström - * Keith Whitwell - * Eric Anholt - * Dave Airlie - */ - -#include -#include -#include -#include -#include -#include - -#include "errno.h" -#include "mtypes.h" -#include "dri_bufmgr.h" -#include "string.h" -#include "imports.h" - -#include "i915_drm.h" - -#include "intel_bufmgr_ttm.h" - -#define DBG(...) do { \ - if (bufmgr_ttm->bufmgr.debug) \ - fprintf(stderr, __VA_ARGS__); \ -} while (0) - -/* - * These bits are always specified in each validation - * request. Other bits are not supported at this point - * as it would require a bit of investigation to figure - * out what mask value should be used. - */ -#define INTEL_BO_MASK (DRM_BO_MASK_MEM | \ - DRM_BO_FLAG_READ | \ - DRM_BO_FLAG_WRITE | \ - DRM_BO_FLAG_EXE) - -struct intel_validate_entry { - dri_bo *bo; - struct drm_i915_op_arg bo_arg; -}; - -struct dri_ttm_bo_bucket_entry { - drmBO drm_bo; - struct dri_ttm_bo_bucket_entry *next; -}; - -struct dri_ttm_bo_bucket { - struct dri_ttm_bo_bucket_entry *head; - struct dri_ttm_bo_bucket_entry **tail; - /** - * Limit on the number of entries in this bucket. - * - * 0 means that this caching at this bucket size is disabled. - * -1 means that there is no limit to caching at this size. - */ - int max_entries; - int num_entries; -}; - -/* Arbitrarily chosen, 16 means that the maximum size we'll cache for reuse - * is 1 << 16 pages, or 256MB. - */ -#define INTEL_TTM_BO_BUCKETS 16 -typedef struct _dri_bufmgr_ttm { - dri_bufmgr bufmgr; - - int fd; - unsigned int fence_type; - unsigned int fence_type_flush; - - uint32_t max_relocs; - - struct intel_validate_entry *validate_array; - int validate_array_size; - int validate_count; - - /** Array of lists of cached drmBOs of power-of-two sizes */ - struct dri_ttm_bo_bucket cache_bucket[INTEL_TTM_BO_BUCKETS]; -} dri_bufmgr_ttm; - -/** - * Private information associated with a relocation that isn't already stored - * in the relocation buffer to be passed to the kernel. - */ -struct dri_ttm_reloc { - dri_bo *target_buf; - uint64_t validate_flags; - /** Offset of target_buf after last execution of this relocation entry. */ - unsigned int last_target_offset; -}; - -typedef struct _dri_bo_ttm { - dri_bo bo; - - int refcount; - unsigned int map_count; - drmBO drm_bo; - const char *name; - - uint64_t last_flags; - - /** - * Index of the buffer within the validation list while preparing a - * batchbuffer execution. - */ - int validate_index; - - /** DRM buffer object containing relocation list */ - uint32_t *reloc_buf_data; - struct dri_ttm_reloc *relocs; - - /** - * Indicates that the buffer may be shared with other processes, so we - * can't hold maps beyond when the user does. - */ - GLboolean shared; - - GLboolean delayed_unmap; - /* Virtual address from the dri_bo_map whose unmap was delayed. */ - void *saved_virtual; -} dri_bo_ttm; - -typedef struct _dri_fence_ttm -{ - dri_fence fence; - - int refcount; - const char *name; - drmFence drm_fence; -} dri_fence_ttm; - -static int -logbase2(int n) -{ - GLint i = 1; - GLint log2 = 0; - - while (n > i) { - i *= 2; - log2++; - } - - return log2; -} - -static struct dri_ttm_bo_bucket * -dri_ttm_bo_bucket_for_size(dri_bufmgr_ttm *bufmgr_ttm, unsigned long size) -{ - int i; - - /* We only do buckets in power of two increments */ - if ((size & (size - 1)) != 0) - return NULL; - - /* We should only see sizes rounded to pages. */ - assert((size % 4096) == 0); - - /* We always allocate in units of pages */ - i = ffs(size / 4096) - 1; - if (i >= INTEL_TTM_BO_BUCKETS) - return NULL; - - return &bufmgr_ttm->cache_bucket[i]; -} - - -static void dri_ttm_dump_validation_list(dri_bufmgr_ttm *bufmgr_ttm) -{ - int i, j; - - for (i = 0; i < bufmgr_ttm->validate_count; i++) { - dri_bo *bo = bufmgr_ttm->validate_array[i].bo; - dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; - - if (bo_ttm->reloc_buf_data != NULL) { - for (j = 0; j < (bo_ttm->reloc_buf_data[0] & 0xffff); j++) { - uint32_t *reloc_entry = bo_ttm->reloc_buf_data + - I915_RELOC_HEADER + - j * I915_RELOC0_STRIDE; - dri_bo *target_bo = bo_ttm->relocs[j].target_buf; - dri_bo_ttm *target_ttm = (dri_bo_ttm *)target_bo; - - DBG("%2d: %s@0x%08x -> %s@0x%08lx + 0x%08x\n", - i, - bo_ttm->name, reloc_entry[0], - target_ttm->name, target_bo->offset, - reloc_entry[1]); - } - } else { - DBG("%2d: %s\n", i, bo_ttm->name); - } - } -} - -/** - * Adds the given buffer to the list of buffers to be validated (moved into the - * appropriate memory type) with the next batch submission. - * - * If a buffer is validated multiple times in a batch submission, it ends up - * with the intersection of the memory type flags and the union of the - * access flags. - */ -static void -intel_add_validate_buffer(dri_bo *buf, - uint64_t flags) -{ - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr; - dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf; - - /* If we delayed doing an unmap to mitigate map/unmap syscall thrashing, - * do that now. - */ - if (ttm_buf->delayed_unmap) { - drmBOUnmap(bufmgr_ttm->fd, &ttm_buf->drm_bo); - ttm_buf->delayed_unmap = GL_FALSE; - } - - if (ttm_buf->validate_index == -1) { - struct intel_validate_entry *entry; - struct drm_i915_op_arg *arg; - struct drm_bo_op_req *req; - int index; - - /* Extend the array of validation entries as necessary. */ - if (bufmgr_ttm->validate_count == bufmgr_ttm->validate_array_size) { - int i, new_size = bufmgr_ttm->validate_array_size * 2; - - if (new_size == 0) - new_size = 5; - - bufmgr_ttm->validate_array = - realloc(bufmgr_ttm->validate_array, - sizeof(struct intel_validate_entry) * new_size); - bufmgr_ttm->validate_array_size = new_size; - - /* Update pointers for realloced mem. */ - for (i = 0; i < bufmgr_ttm->validate_count - 1; i++) { - bufmgr_ttm->validate_array[i].bo_arg.next = (unsigned long) - &bufmgr_ttm->validate_array[i + 1].bo_arg; - } - } - - /* Pick out the new array entry for ourselves */ - index = bufmgr_ttm->validate_count; - ttm_buf->validate_index = index; - entry = &bufmgr_ttm->validate_array[index]; - bufmgr_ttm->validate_count++; - - /* Fill in array entry */ - entry->bo = buf; - dri_bo_reference(buf); - - /* Fill in kernel arg */ - arg = &entry->bo_arg; - req = &arg->d.req; - - memset(arg, 0, sizeof(*arg)); - req->bo_req.handle = ttm_buf->drm_bo.handle; - req->op = drm_bo_validate; - req->bo_req.flags = flags; - req->bo_req.hint = 0; -#ifdef DRM_BO_HINT_PRESUMED_OFFSET - /* PRESUMED_OFFSET indicates that all relocations pointing at this - * buffer have the correct offset. If any of our relocations don't, - * this flag will be cleared off the buffer later in the relocation - * processing. - */ - req->bo_req.hint |= DRM_BO_HINT_PRESUMED_OFFSET; - req->bo_req.presumed_offset = buf->offset; -#endif - req->bo_req.mask = INTEL_BO_MASK; - req->bo_req.fence_class = 0; /* Backwards compat. */ - - if (ttm_buf->reloc_buf_data != NULL) - arg->reloc_ptr = (unsigned long)(void *)ttm_buf->reloc_buf_data; - else - arg->reloc_ptr = 0; - - /* Hook up the linked list of args for the kernel */ - arg->next = 0; - if (index != 0) { - bufmgr_ttm->validate_array[index - 1].bo_arg.next = - (unsigned long)arg; - } - } else { - struct intel_validate_entry *entry = - &bufmgr_ttm->validate_array[ttm_buf->validate_index]; - struct drm_i915_op_arg *arg = &entry->bo_arg; - struct drm_bo_op_req *req = &arg->d.req; - uint64_t memFlags = req->bo_req.flags & flags & DRM_BO_MASK_MEM; - uint64_t modeFlags = (req->bo_req.flags | flags) & ~DRM_BO_MASK_MEM; - - /* Buffer was already in the validate list. Extend its flags as - * necessary. - */ - - if (memFlags == 0) { - fprintf(stderr, - "%s: No shared memory types between " - "0x%16llx and 0x%16llx\n", - __FUNCTION__, req->bo_req.flags, flags); - abort(); - } - if (flags & ~INTEL_BO_MASK) { - fprintf(stderr, - "%s: Flags bits 0x%16llx are not supposed to be used in a relocation\n", - __FUNCTION__, flags & ~INTEL_BO_MASK); - abort(); - } - req->bo_req.flags = memFlags | modeFlags; - } -} - - -#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \ - sizeof(uint32_t)) - -static int -intel_setup_reloc_list(dri_bo *bo) -{ - dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bo->bufmgr; - - bo_ttm->relocs = calloc(bufmgr_ttm->max_relocs, - sizeof(struct dri_ttm_reloc)); - bo_ttm->reloc_buf_data = calloc(1, RELOC_BUF_SIZE(bufmgr_ttm->max_relocs)); - - /* Initialize the relocation list with the header: - * DWORD 0: relocation count - * DWORD 1: relocation type - * DWORD 2+3: handle to next relocation list (currently none) 64-bits - */ - bo_ttm->reloc_buf_data[0] = 0; - bo_ttm->reloc_buf_data[1] = I915_RELOC_TYPE_0; - bo_ttm->reloc_buf_data[2] = 0; - bo_ttm->reloc_buf_data[3] = 0; - - return 0; -} - -#if 0 -int -driFenceSignaled(DriFenceObject * fence, unsigned type) -{ - int signaled; - int ret; - - if (fence == NULL) - return GL_TRUE; - - ret = drmFenceSignaled(bufmgr_ttm->fd, &fence->fence, type, &signaled); - BM_CKFATAL(ret); - return signaled; -} -#endif - -static dri_bo * -dri_ttm_alloc(dri_bufmgr *bufmgr, const char *name, - unsigned long size, unsigned int alignment, - uint64_t location_mask) -{ - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; - dri_bo_ttm *ttm_buf; - unsigned int pageSize = getpagesize(); - int ret; - uint64_t flags; - unsigned int hint; - unsigned long alloc_size; - struct dri_ttm_bo_bucket *bucket; - GLboolean alloc_from_cache = GL_FALSE; - - ttm_buf = calloc(1, sizeof(*ttm_buf)); - if (!ttm_buf) - return NULL; - - /* The mask argument doesn't do anything for us that we want other than - * determine which pool (TTM or local) the buffer is allocated into, so - * just pass all of the allocation class flags. - */ - flags = location_mask | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE | - DRM_BO_FLAG_EXE; - /* No hints we want to use. */ - hint = 0; - - /* Round the allocated size up to a power of two number of pages. */ - alloc_size = 1 << logbase2(size); - if (alloc_size < pageSize) - alloc_size = pageSize; - bucket = dri_ttm_bo_bucket_for_size(bufmgr_ttm, alloc_size); - - /* If we don't have caching at this size, don't actually round the - * allocation up. - */ - if (bucket == NULL || bucket->max_entries == 0) - alloc_size = size; - - /* Get a buffer out of the cache if available */ - if (bucket != NULL && bucket->num_entries > 0) { - struct dri_ttm_bo_bucket_entry *entry = bucket->head; - int busy; - - /* Check if the buffer is still in flight. If not, reuse it. */ - ret = drmBOBusy(bufmgr_ttm->fd, &entry->drm_bo, &busy); - alloc_from_cache = (ret == 0 && busy == 0); - - if (alloc_from_cache) { - bucket->head = entry->next; - if (entry->next == NULL) - bucket->tail = &bucket->head; - bucket->num_entries--; - - ttm_buf->drm_bo = entry->drm_bo; - free(entry); - } - } - - if (!alloc_from_cache) { - ret = drmBOCreate(bufmgr_ttm->fd, alloc_size, alignment / pageSize, - NULL, flags, hint, &ttm_buf->drm_bo); - if (ret != 0) { - free(ttm_buf); - return NULL; - } - } - - ttm_buf->bo.size = size; - ttm_buf->bo.offset = ttm_buf->drm_bo.offset; - ttm_buf->bo.virtual = NULL; - ttm_buf->bo.bufmgr = bufmgr; - ttm_buf->name = name; - ttm_buf->refcount = 1; - ttm_buf->reloc_buf_data = NULL; - ttm_buf->relocs = NULL; - ttm_buf->last_flags = ttm_buf->drm_bo.flags; - ttm_buf->shared = GL_FALSE; - ttm_buf->delayed_unmap = GL_FALSE; - ttm_buf->validate_index = -1; - - DBG("bo_create: %p (%s) %ldb\n", &ttm_buf->bo, ttm_buf->name, size); - - return &ttm_buf->bo; -} - -/* Our TTM backend doesn't allow creation of static buffers, as that requires - * privelege for the non-fake case, and the lock in the fake case where we were - * working around the X Server not creating buffers and passing handles to us. - */ -static dri_bo * -dri_ttm_alloc_static(dri_bufmgr *bufmgr, const char *name, - unsigned long offset, unsigned long size, void *virtual, - uint64_t location_mask) -{ - return NULL; -} - -/** - * Returns a dri_bo wrapping the given buffer object handle. - * - * This can be used when one application needs to pass a buffer object - * to another. - */ -dri_bo * -intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name, - unsigned int handle) -{ - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; - dri_bo_ttm *ttm_buf; - int ret; - - ttm_buf = calloc(1, sizeof(*ttm_buf)); - if (!ttm_buf) - return NULL; - - ret = drmBOReference(bufmgr_ttm->fd, handle, &ttm_buf->drm_bo); - if (ret != 0) { - fprintf(stderr, "Couldn't reference %s handle 0x%08x: %s\n", - name, handle, strerror(-ret)); - free(ttm_buf); - return NULL; - } - ttm_buf->bo.size = ttm_buf->drm_bo.size; - ttm_buf->bo.offset = ttm_buf->drm_bo.offset; - ttm_buf->bo.virtual = NULL; - ttm_buf->bo.bufmgr = bufmgr; - ttm_buf->name = name; - ttm_buf->refcount = 1; - ttm_buf->reloc_buf_data = NULL; - ttm_buf->relocs = NULL; - ttm_buf->last_flags = ttm_buf->drm_bo.flags; - ttm_buf->shared = GL_TRUE; - ttm_buf->delayed_unmap = GL_FALSE; - ttm_buf->validate_index = -1; - - DBG("bo_create_from_handle: %p %08x (%s)\n", - &ttm_buf->bo, handle, ttm_buf->name); - - return &ttm_buf->bo; -} - -static void -dri_ttm_bo_reference(dri_bo *buf) -{ - dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf; - - ttm_buf->refcount++; -} - -static void -dri_ttm_bo_unreference(dri_bo *buf) -{ - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr; - dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf; - - if (!buf) - return; - - if (--ttm_buf->refcount == 0) { - struct dri_ttm_bo_bucket *bucket; - int ret; - - assert(ttm_buf->map_count == 0); - - if (ttm_buf->reloc_buf_data) { - int i; - - /* Unreference all the target buffers */ - for (i = 0; i < (ttm_buf->reloc_buf_data[0] & 0xffff); i++) - dri_bo_unreference(ttm_buf->relocs[i].target_buf); - free(ttm_buf->relocs); - - /* Free the kernel BO containing relocation entries */ - free(ttm_buf->reloc_buf_data); - ttm_buf->reloc_buf_data = NULL; - } - - if (ttm_buf->delayed_unmap) { - int ret = drmBOUnmap(bufmgr_ttm->fd, &ttm_buf->drm_bo); - - if (ret != 0) { - fprintf(stderr, "%s:%d: Error unmapping buffer %s: %s.\n", - __FILE__, __LINE__, ttm_buf->name, strerror(-ret)); - } - } - - bucket = dri_ttm_bo_bucket_for_size(bufmgr_ttm, ttm_buf->drm_bo.size); - /* Put the buffer into our internal cache for reuse if we can. */ - if (!ttm_buf->shared && - bucket != NULL && - (bucket->max_entries == -1 || - (bucket->max_entries > 0 && - bucket->num_entries < bucket->max_entries))) - { - struct dri_ttm_bo_bucket_entry *entry; - - entry = calloc(1, sizeof(*entry)); - entry->drm_bo = ttm_buf->drm_bo; - - entry->next = NULL; - *bucket->tail = entry; - bucket->tail = &entry->next; - bucket->num_entries++; - } else { - /* Decrement the kernel refcount for the buffer. */ - ret = drmBOUnreference(bufmgr_ttm->fd, &ttm_buf->drm_bo); - if (ret != 0) { - fprintf(stderr, "drmBOUnreference failed (%s): %s\n", - ttm_buf->name, strerror(-ret)); - } - } - - DBG("bo_unreference final: %p (%s)\n", &ttm_buf->bo, ttm_buf->name); - - free(buf); - return; - } -} - -static int -dri_ttm_bo_map(dri_bo *buf, GLboolean write_enable) -{ - dri_bufmgr_ttm *bufmgr_ttm; - dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf; - uint64_t flags; - int ret; - - bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr; - - flags = DRM_BO_FLAG_READ; - if (write_enable) - flags |= DRM_BO_FLAG_WRITE; - - /* Allow recursive mapping. Mesa may recursively map buffers with - * nested display loops. - */ - if (ttm_buf->map_count++ != 0) - return 0; - - assert(buf->virtual == NULL); - - DBG("bo_map: %p (%s)\n", &ttm_buf->bo, ttm_buf->name); - - /* XXX: What about if we're upgrading from READ to WRITE? */ - if (ttm_buf->delayed_unmap) { - buf->virtual = ttm_buf->saved_virtual; - return 0; - } - - ret = drmBOMap(bufmgr_ttm->fd, &ttm_buf->drm_bo, flags, 0, &buf->virtual); - if (ret != 0) { - fprintf(stderr, "%s:%d: Error mapping buffer %s: %s .\n", - __FILE__, __LINE__, ttm_buf->name, strerror(-ret)); - } - - return ret; -} - -static int -dri_ttm_bo_unmap(dri_bo *buf) -{ - dri_bufmgr_ttm *bufmgr_ttm; - dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf; - int ret; - - if (buf == NULL) - return 0; - - assert(ttm_buf->map_count != 0); - if (--ttm_buf->map_count != 0) - return 0; - - bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr; - - assert(buf->virtual != NULL); - - DBG("bo_unmap: %p (%s)\n", &ttm_buf->bo, ttm_buf->name); - - if (!ttm_buf->shared) { - ttm_buf->saved_virtual = buf->virtual; - ttm_buf->delayed_unmap = GL_TRUE; - buf->virtual = NULL; - - return 0; - } - - buf->virtual = NULL; - - ret = drmBOUnmap(bufmgr_ttm->fd, &ttm_buf->drm_bo); - if (ret != 0) { - fprintf(stderr, "%s:%d: Error unmapping buffer %s: %s.\n", - __FILE__, __LINE__, ttm_buf->name, strerror(-ret)); - } - - return ret; -} - -/** - * Returns a dri_bo wrapping the given buffer object handle. - * - * This can be used when one application needs to pass a buffer object - * to another. - */ -dri_fence * -intel_ttm_fence_create_from_arg(dri_bufmgr *bufmgr, const char *name, - drm_fence_arg_t *arg) -{ - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; - dri_fence_ttm *ttm_fence; - - ttm_fence = malloc(sizeof(*ttm_fence)); - if (!ttm_fence) - return NULL; - - ttm_fence->drm_fence.handle = arg->handle; - ttm_fence->drm_fence.fence_class = arg->fence_class; - ttm_fence->drm_fence.type = arg->type; - ttm_fence->drm_fence.flags = arg->flags; - ttm_fence->drm_fence.signaled = 0; - ttm_fence->drm_fence.sequence = arg->sequence; - - ttm_fence->fence.bufmgr = bufmgr; - ttm_fence->name = name; - ttm_fence->refcount = 1; - - DBG("fence_create_from_handle: %p (%s)\n", - &ttm_fence->fence, ttm_fence->name); - - return &ttm_fence->fence; -} - - -static void -dri_ttm_fence_reference(dri_fence *fence) -{ - dri_fence_ttm *fence_ttm = (dri_fence_ttm *)fence; - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)fence->bufmgr; - - ++fence_ttm->refcount; - DBG("fence_reference: %p (%s)\n", &fence_ttm->fence, fence_ttm->name); -} - -static void -dri_ttm_fence_unreference(dri_fence *fence) -{ - dri_fence_ttm *fence_ttm = (dri_fence_ttm *)fence; - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)fence->bufmgr; - - if (!fence) - return; - - DBG("fence_unreference: %p (%s)\n", &fence_ttm->fence, fence_ttm->name); - - if (--fence_ttm->refcount == 0) { - int ret; - - ret = drmFenceUnreference(bufmgr_ttm->fd, &fence_ttm->drm_fence); - if (ret != 0) { - fprintf(stderr, "drmFenceUnreference failed (%s): %s\n", - fence_ttm->name, strerror(-ret)); - } - - free(fence); - return; - } -} - -static void -dri_ttm_fence_wait(dri_fence *fence) -{ - dri_fence_ttm *fence_ttm = (dri_fence_ttm *)fence; - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)fence->bufmgr; - int ret; - - ret = drmFenceWait(bufmgr_ttm->fd, DRM_FENCE_FLAG_WAIT_LAZY, &fence_ttm->drm_fence, 0); - if (ret != 0) { - fprintf(stderr, "%s:%d: Error waiting for fence %s: %s.\n", - __FILE__, __LINE__, fence_ttm->name, strerror(-ret)); - abort(); - } - - DBG("fence_wait: %p (%s)\n", &fence_ttm->fence, fence_ttm->name); -} - -static void -dri_bufmgr_ttm_destroy(dri_bufmgr *bufmgr) -{ - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; - int i; - - free(bufmgr_ttm->validate_array); - - /* Free any cached buffer objects we were going to reuse */ - for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) { - struct dri_ttm_bo_bucket *bucket = &bufmgr_ttm->cache_bucket[i]; - struct dri_ttm_bo_bucket_entry *entry; - - while ((entry = bucket->head) != NULL) { - int ret; - - bucket->head = entry->next; - if (entry->next == NULL) - bucket->tail = &bucket->head; - bucket->num_entries--; - - /* Decrement the kernel refcount for the buffer. */ - ret = drmBOUnreference(bufmgr_ttm->fd, &entry->drm_bo); - if (ret != 0) { - fprintf(stderr, "drmBOUnreference failed: %s\n", - strerror(-ret)); - } - - free(entry); - } - } - - free(bufmgr); -} - -/** - * Adds the target buffer to the validation list and adds the relocation - * to the reloc_buffer's relocation list. - * - * The relocation entry at the given offset must already contain the - * precomputed relocation value, because the kernel will optimize out - * the relocation entry write when the buffer hasn't moved from the - * last known offset in target_buf. - */ -static int -dri_ttm_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta, - GLuint offset, dri_bo *target_buf) -{ - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)reloc_buf->bufmgr; - dri_bo_ttm *reloc_buf_ttm = (dri_bo_ttm *)reloc_buf; - dri_bo_ttm *target_buf_ttm = (dri_bo_ttm *)target_buf; - int num_relocs; - uint32_t *this_reloc; - - /* Create a new relocation list if needed */ - if (reloc_buf_ttm->reloc_buf_data == NULL) - intel_setup_reloc_list(reloc_buf); - - num_relocs = reloc_buf_ttm->reloc_buf_data[0]; - - /* Check overflow */ - assert(num_relocs < bufmgr_ttm->max_relocs); - - this_reloc = reloc_buf_ttm->reloc_buf_data + I915_RELOC_HEADER + - num_relocs * I915_RELOC0_STRIDE; - - this_reloc[0] = offset; - this_reloc[1] = delta; - this_reloc[2] = target_buf_ttm->drm_bo.handle; /* To be filled in at exec time */ - this_reloc[3] = 0; - - reloc_buf_ttm->relocs[num_relocs].validate_flags = flags; - reloc_buf_ttm->relocs[num_relocs].target_buf = target_buf; - dri_bo_reference(target_buf); - - reloc_buf_ttm->reloc_buf_data[0]++; /* Increment relocation count */ - /* Check wraparound */ - assert(reloc_buf_ttm->reloc_buf_data[0] != 0); - return 0; -} - -/** - * Walk the tree of relocations rooted at BO and accumulate the list of - * validations to be performed and update the relocation buffers with - * index values into the validation list. - */ -static void -dri_ttm_bo_process_reloc(dri_bo *bo) -{ - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bo->bufmgr; - dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; - unsigned int nr_relocs; - int i; - - if (bo_ttm->reloc_buf_data == NULL) - return; - - nr_relocs = bo_ttm->reloc_buf_data[0] & 0xffff; - - for (i = 0; i < nr_relocs; i++) { - struct dri_ttm_reloc *r = &bo_ttm->relocs[i]; - - /* Continue walking the tree depth-first. */ - dri_ttm_bo_process_reloc(r->target_buf); - - /* Add the target to the validate list */ - intel_add_validate_buffer(r->target_buf, r->validate_flags); - - /* Clear the PRESUMED_OFFSET flag from the validate list entry of the - * target if this buffer has a stale relocated pointer at it. - */ - if (r->last_target_offset != r->target_buf->offset) { - dri_bo_ttm *target_buf_ttm = (dri_bo_ttm *)r->target_buf; - struct intel_validate_entry *entry = - &bufmgr_ttm->validate_array[target_buf_ttm->validate_index]; - - entry->bo_arg.d.req.bo_req.hint &= ~DRM_BO_HINT_PRESUMED_OFFSET; - } - } -} - -static void * -dri_ttm_process_reloc(dri_bo *batch_buf, GLuint *count) -{ - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)batch_buf->bufmgr; - - /* Update indices and set up the validate list. */ - dri_ttm_bo_process_reloc(batch_buf); - - /* Add the batch buffer to the validation list. There are no relocations - * pointing to it. - */ - intel_add_validate_buffer(batch_buf, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE); - - *count = bufmgr_ttm->validate_count; - return &bufmgr_ttm->validate_array[0].bo_arg; -} - -static const char * -intel_get_flags_mem_type_string(uint64_t flags) -{ - switch (flags & DRM_BO_MASK_MEM) { - case DRM_BO_FLAG_MEM_LOCAL: return "local"; - case DRM_BO_FLAG_MEM_TT: return "ttm"; - case DRM_BO_FLAG_MEM_VRAM: return "vram"; - case DRM_BO_FLAG_MEM_PRIV0: return "priv0"; - case DRM_BO_FLAG_MEM_PRIV1: return "priv1"; - case DRM_BO_FLAG_MEM_PRIV2: return "priv2"; - case DRM_BO_FLAG_MEM_PRIV3: return "priv3"; - case DRM_BO_FLAG_MEM_PRIV4: return "priv4"; - default: return NULL; - } -} - -static const char * -intel_get_flags_caching_string(uint64_t flags) -{ - switch (flags & (DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED)) { - case 0: return "UU"; - case DRM_BO_FLAG_CACHED: return "CU"; - case DRM_BO_FLAG_CACHED_MAPPED: return "UC"; - case DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED: return "CC"; - default: return NULL; - } -} - -static void -intel_update_buffer_offsets (dri_bufmgr_ttm *bufmgr_ttm) -{ - int i; - - for (i = 0; i < bufmgr_ttm->validate_count; i++) { - dri_bo *bo = bufmgr_ttm->validate_array[i].bo; - dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; - struct drm_i915_op_arg *arg = &bufmgr_ttm->validate_array[i].bo_arg; - struct drm_bo_arg_rep *rep = &arg->d.rep; - - /* Update the flags */ - if (rep->bo_info.flags != bo_ttm->last_flags) { - DBG("BO %s migrated: %s/%s -> %s/%s\n", - bo_ttm->name, - intel_get_flags_mem_type_string(bo_ttm->last_flags), - intel_get_flags_caching_string(bo_ttm->last_flags), - intel_get_flags_mem_type_string(rep->bo_info.flags), - intel_get_flags_caching_string(rep->bo_info.flags)); - - bo_ttm->last_flags = rep->bo_info.flags; - } - /* Update the buffer offset */ - if (rep->bo_info.offset != bo->offset) { - DBG("BO %s migrated: 0x%08lx -> 0x%08lx\n", - bo_ttm->name, bo->offset, (unsigned long)rep->bo_info.offset); - bo->offset = rep->bo_info.offset; - } - } -} - -/** - * Update the last target offset field of relocation entries for PRESUMED_OFFSET - * computation. - */ -static void -dri_ttm_bo_post_submit(dri_bo *bo) -{ - dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; - unsigned int nr_relocs; - int i; - - if (bo_ttm->reloc_buf_data == NULL) - return; - - nr_relocs = bo_ttm->reloc_buf_data[0] & 0xffff; - - for (i = 0; i < nr_relocs; i++) { - struct dri_ttm_reloc *r = &bo_ttm->relocs[i]; - - /* Continue walking the tree depth-first. */ - dri_ttm_bo_post_submit(r->target_buf); - - r->last_target_offset = r->target_buf->offset; - } -} - -static void -dri_ttm_post_submit(dri_bo *batch_buf, dri_fence **last_fence) -{ - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)batch_buf->bufmgr; - int i; - - intel_update_buffer_offsets (bufmgr_ttm); - - dri_ttm_bo_post_submit(batch_buf); - - if (bufmgr_ttm->bufmgr.debug) - dri_ttm_dump_validation_list(bufmgr_ttm); - - for (i = 0; i < bufmgr_ttm->validate_count; i++) { - dri_bo *bo = bufmgr_ttm->validate_array[i].bo; - dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo; - - /* Disconnect the buffer from the validate list */ - bo_ttm->validate_index = -1; - dri_bo_unreference(bo); - bufmgr_ttm->validate_array[i].bo = NULL; - } - bufmgr_ttm->validate_count = 0; -} - -/** - * Enables unlimited caching of buffer objects for reuse. - * - * This is potentially very memory expensive, as the cache at each bucket - * size is only bounded by how many buffers of that size we've managed to have - * in flight at once. - */ -void -intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr) -{ - dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr; - int i; - - for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) { - bufmgr_ttm->cache_bucket[i].max_entries = -1; - } -} - -/* - * - */ -static int -dri_ttm_check_aperture_space(dri_bo *bo) -{ - return 0; -} - -/** - * Initializes the TTM buffer manager, which uses the kernel to allocate, map, - * and manage map buffer objections. - * - * \param fd File descriptor of the opened DRM device. - * \param fence_type Driver-specific fence type used for fences with no flush. - * \param fence_type_flush Driver-specific fence type used for fences with a - * flush. - */ -dri_bufmgr * -intel_bufmgr_ttm_init(int fd, unsigned int fence_type, - unsigned int fence_type_flush, int batch_size) -{ - dri_bufmgr_ttm *bufmgr_ttm; - int i; - - bufmgr_ttm = calloc(1, sizeof(*bufmgr_ttm)); - bufmgr_ttm->fd = fd; - bufmgr_ttm->fence_type = fence_type; - bufmgr_ttm->fence_type_flush = fence_type_flush; - - /* Let's go with one relocation per every 2 dwords (but round down a bit - * since a power of two will mean an extra page allocation for the reloc - * buffer). - * - * Every 4 was too few for the blender benchmark. - */ - bufmgr_ttm->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; - - bufmgr_ttm->bufmgr.bo_alloc = dri_ttm_alloc; - bufmgr_ttm->bufmgr.bo_alloc_static = dri_ttm_alloc_static; - bufmgr_ttm->bufmgr.bo_reference = dri_ttm_bo_reference; - bufmgr_ttm->bufmgr.bo_unreference = dri_ttm_bo_unreference; - bufmgr_ttm->bufmgr.bo_map = dri_ttm_bo_map; - bufmgr_ttm->bufmgr.bo_unmap = dri_ttm_bo_unmap; - bufmgr_ttm->bufmgr.fence_reference = dri_ttm_fence_reference; - bufmgr_ttm->bufmgr.fence_unreference = dri_ttm_fence_unreference; - bufmgr_ttm->bufmgr.fence_wait = dri_ttm_fence_wait; - bufmgr_ttm->bufmgr.destroy = dri_bufmgr_ttm_destroy; - bufmgr_ttm->bufmgr.emit_reloc = dri_ttm_emit_reloc; - bufmgr_ttm->bufmgr.process_relocs = dri_ttm_process_reloc; - bufmgr_ttm->bufmgr.post_submit = dri_ttm_post_submit; - bufmgr_ttm->bufmgr.debug = GL_FALSE; - bufmgr_ttm->bufmgr.check_aperture_space = dri_ttm_check_aperture_space; - /* Initialize the linked lists for BO reuse cache. */ - for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) - bufmgr_ttm->cache_bucket[i].tail = &bufmgr_ttm->cache_bucket[i].head; - - return &bufmgr_ttm->bufmgr; -} - diff --git a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h deleted file mode 100644 index d267a168cd..0000000000 --- a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h +++ /dev/null @@ -1,20 +0,0 @@ - -#ifndef INTEL_BUFMGR_TTM_H -#define INTEL_BUFMGR_TTM_H - -#include "dri_bufmgr.h" - -extern dri_bo *intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name, - unsigned int handle); - -dri_fence *intel_ttm_fence_create_from_arg(dri_bufmgr *bufmgr, const char *name, - drm_fence_arg_t *arg); - - -dri_bufmgr *intel_bufmgr_ttm_init(int fd, unsigned int fence_type, - unsigned int fence_type_flush, int batch_size); - -void -intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr); - -#endif diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index e1941c302c..f33805ba05 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -59,8 +59,7 @@ #include "intel_buffer_objects.h" #include "intel_fbo.h" #include "intel_decode.h" -#include "intel_bufmgr_fake.h" -#include "intel_bufmgr_gem.h" +#include "intel_bufmgr.h" #include "drirenderbuffer.h" #include "vblank.h" @@ -474,7 +473,7 @@ intel_init_bufmgr(struct intel_context *intel) case DRI_CONF_BO_REUSE_DISABLED: break; case DRI_CONF_BO_REUSE_ALL: - intel_gem_enable_bo_reuse(intel->bufmgr); + intel_bufmgr_gem_enable_reuse(intel->bufmgr); break; } } @@ -493,12 +492,12 @@ intel_init_bufmgr(struct intel_context *intel) return GL_FALSE; } - intel->bufmgr = dri_bufmgr_fake_init(intelScreen->tex.offset, - intelScreen->tex.map, - intelScreen->tex.size, - intel_fence_emit, - intel_fence_wait, - intel); + intel->bufmgr = intel_bufmgr_fake_init(intelScreen->tex.offset, + intelScreen->tex.map, + intelScreen->tex.size, + intel_fence_emit, + intel_fence_wait, + intel); } /* XXX bufmgr should be per-screen, not per-context */ @@ -873,7 +872,7 @@ intelContendedLock(struct intel_context *intel, GLuint flags) */ if (!intel->ttm && sarea->texAge != intel->hHWContext) { sarea->texAge = intel->hHWContext; - dri_bufmgr_fake_contended_lock_take(intel->bufmgr); + intel_bufmgr_fake_contended_lock_take(intel->bufmgr); if (INTEL_DEBUG & DEBUG_BATCH) intel_decode_context_reset(); if (INTEL_DEBUG & DEBUG_BUFMGR) diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index 35ef22aa27..579883437f 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -35,6 +35,7 @@ #include "mm.h" #include "texmem.h" #include "dri_bufmgr.h" +#include "intel_bufmgr.h" #include "intel_screen.h" #include "intel_tex_obj.h" diff --git a/src/mesa/drivers/dri/intel/intel_ioctl.c b/src/mesa/drivers/dri/intel/intel_ioctl.c index 591548ae85..58c81766cd 100644 --- a/src/mesa/drivers/dri/intel/intel_ioctl.c +++ b/src/mesa/drivers/dri/intel/intel_ioctl.c @@ -45,7 +45,7 @@ #include "drm.h" #include "i915_drm.h" -#include "intel_bufmgr_gem.h" +#include "intel_bufmgr.h" #define FILE_DEBUG_FLAG DEBUG_IOCTL diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c index 7d78e4eca7..c7e2c551dd 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.c +++ b/src/mesa/drivers/dri/intel/intel_regions.c @@ -44,7 +44,7 @@ #include "intel_blit.h" #include "intel_buffer_objects.h" #include "dri_bufmgr.h" -#include "intel_bufmgr_gem.h" +#include "intel_bufmgr.h" #include "intel_batchbuffer.h" #define FILE_DEBUG_FLAG DEBUG_REGION @@ -106,10 +106,7 @@ intel_region_alloc(struct intel_context *intel, dri_bo *buffer; buffer = dri_bo_alloc(intel->bufmgr, "region", - pitch * cpp * height, 64, - DRM_BO_FLAG_MEM_LOCAL | - DRM_BO_FLAG_CACHED | - DRM_BO_FLAG_CACHED_MAPPED); + pitch * cpp * height, 64); return intel_region_alloc_internal(intel, cpp, pitch, height, 0, buffer); } @@ -121,7 +118,7 @@ intel_region_alloc_for_handle(struct intel_context *intel, { dri_bo *buffer; - buffer = intel_gem_bo_create_from_handle(intel->bufmgr, "region", handle); + buffer = intel_bo_gem_create_from_name(intel->bufmgr, "region", handle); return intel_region_alloc_internal(intel, cpp, pitch, height, tiled, buffer); @@ -355,10 +352,7 @@ intel_region_release_pbo(struct intel_context *intel, region->buffer = dri_bo_alloc(intel->bufmgr, "region", region->pitch * region->cpp * region->height, - 64, - DRM_BO_FLAG_MEM_LOCAL | - DRM_BO_FLAG_CACHED | - DRM_BO_FLAG_CACHED_MAPPED); + 64); } /* Break the COW tie to the pbo. Both the pbo and the region end up @@ -440,17 +434,16 @@ intel_recreate_static(struct intel_context *intel, if (intel->ttm) { assert(region_desc->bo_handle != -1); - region->buffer = intel_gem_bo_create_from_handle(intel->bufmgr, - name, - region_desc->bo_handle); + region->buffer = intel_bo_gem_create_from_name(intel->bufmgr, + name, + region_desc->bo_handle); } else { - region->buffer = dri_bo_alloc_static(intel->bufmgr, - name, - region_desc->offset, - intelScreen->pitch * - intelScreen->height, - region_desc->map, - DRM_BO_FLAG_MEM_TT); + region->buffer = intel_bo_fake_alloc_static(intel->bufmgr, + name, + region_desc->offset, + intelScreen->pitch * + intelScreen->height, + region_desc->map); } assert(region->buffer != NULL); diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index a243324a39..f325e70359 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -49,7 +49,7 @@ #include "i830_dri.h" #include "intel_regions.h" #include "intel_batchbuffer.h" -#include "intel_bufmgr_gem.h" +#include "intel_bufmgr.h" PUBLIC const char __driConfigOptions[] = DRI_CONF_BEGIN -- cgit v1.2.3 From f5d03af109e0c6fdd7cc6786f8ae40f8ae626f5a Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Fri, 6 Jun 2008 13:05:46 -0700 Subject: [intel-gem] Call the new throttle ioctl from swap buffers Swap buffers is a fairly reasonable time to wait for the hardware for a while; this keeps us from overrunning the ring. --- src/mesa/drivers/dri/intel/intel_buffers.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_buffers.c b/src/mesa/drivers/dri/intel/intel_buffers.c index 2a25f079e9..d5257ae27e 100644 --- a/src/mesa/drivers/dri/intel/intel_buffers.c +++ b/src/mesa/drivers/dri/intel/intel_buffers.c @@ -848,6 +848,8 @@ intelSwapBuffers(__DRIdrawablePrivate * dPriv) intel_fb->swap_ust = ust; } + drmCommandNone(intel->driFd, DRM_I915_GEM_THROTTLE); + } else { /* XXX this shouldn't be an error but we can't handle it for now */ -- cgit v1.2.3 From 0227d91a9e3cf65aae3266d100eebd3459dff4c3 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 11 Jun 2008 11:45:51 -0700 Subject: [gem] Enable bo_reuse by default. The objects are swappable, so we're less concerned by excessive object allocation now, and it's about a 20% performance improvement. If we get concerns about the memory consumption from others, we can look into a compromise position later. --- src/mesa/drivers/dri/intel/intel_screen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index f325e70359..09a30e80a5 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -59,7 +59,7 @@ PUBLIC const char __driConfigOptions[] = /* Options correspond to DRI_CONF_BO_REUSE_DISABLED, * DRI_CONF_BO_REUSE_ALL */ - DRI_CONF_OPT_BEGIN_V(bo_reuse, enum, 0, "0:1") + DRI_CONF_OPT_BEGIN_V(bo_reuse, enum, 1, "0:1") DRI_CONF_DESC_BEGIN(en, "Buffer object reuse") DRI_CONF_ENUM(0, "Disable buffer object reuse") DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects") -- cgit v1.2.3 From 407ce3da3c53c9ebba0fbf827d7b0f610122d44b Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 11 Jun 2008 14:44:48 -0700 Subject: [intel-gem] Chase domain flag renaming in the DRM. This is an API breakage only. --- src/mesa/drivers/dri/i915/i830_vtbl.c | 6 +++--- src/mesa/drivers/dri/i915/i915_vtbl.c | 6 +++--- src/mesa/drivers/dri/i965/brw_cc.c | 2 +- src/mesa/drivers/dri/i965/brw_clip_state.c | 2 +- src/mesa/drivers/dri/i965/brw_curbe.c | 2 +- src/mesa/drivers/dri/i965/brw_draw_upload.c | 6 +++--- src/mesa/drivers/dri/i965/brw_gs_state.c | 2 +- src/mesa/drivers/dri/i965/brw_misc_state.c | 16 ++++++++-------- src/mesa/drivers/dri/i965/brw_sf_state.c | 4 ++-- src/mesa/drivers/dri/i965/brw_vs_state.c | 2 +- src/mesa/drivers/dri/i965/brw_wm_sampler_state.c | 2 +- src/mesa/drivers/dri/i965/brw_wm_state.c | 4 ++-- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 10 +++++----- src/mesa/drivers/dri/intel/intel_blit.c | 18 +++++++++--------- 14 files changed, 41 insertions(+), 41 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c index 86bf468a7e..cff051b16b 100644 --- a/src/mesa/drivers/dri/i915/i830_vtbl.c +++ b/src/mesa/drivers/dri/i915/i830_vtbl.c @@ -490,14 +490,14 @@ i830_emit_state(struct intel_context *intel) OUT_BATCH(state->Buffer[I830_DESTREG_CBUFADDR0]); OUT_BATCH(state->Buffer[I830_DESTREG_CBUFADDR1]); OUT_RELOC(state->draw_region->buffer, - DRM_GEM_DOMAIN_I915_RENDER, DRM_GEM_DOMAIN_I915_RENDER, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, state->draw_region->draw_offset); if (state->depth_region) { OUT_BATCH(state->Buffer[I830_DESTREG_DBUFADDR0]); OUT_BATCH(state->Buffer[I830_DESTREG_DBUFADDR1]); OUT_RELOC(state->depth_region->buffer, - DRM_GEM_DOMAIN_I915_RENDER, DRM_GEM_DOMAIN_I915_RENDER, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, state->depth_region->draw_offset); } @@ -524,7 +524,7 @@ i830_emit_state(struct intel_context *intel) if (state->tex_buffer[i]) { OUT_RELOC(state->tex_buffer[i], - DRM_GEM_DOMAIN_I915_SAMPLER, 0, + I915_GEM_DOMAIN_SAMPLER, 0, state->tex_offset[i] | TM0S0_USE_FENCE); } else if (state == &i830->meta) { diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index de1ec5effc..43f5703d9e 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -377,14 +377,14 @@ i915_emit_state(struct intel_context *intel) OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR0]); OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR1]); OUT_RELOC(state->draw_region->buffer, - DRM_GEM_DOMAIN_I915_RENDER, DRM_GEM_DOMAIN_I915_RENDER, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, state->draw_region->draw_offset); if (state->depth_region) { OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR0]); OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR1]); OUT_RELOC(state->depth_region->buffer, - DRM_GEM_DOMAIN_I915_RENDER, DRM_GEM_DOMAIN_I915_RENDER, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, state->depth_region->draw_offset); } @@ -427,7 +427,7 @@ i915_emit_state(struct intel_context *intel) if (state->tex_buffer[i]) { OUT_RELOC(state->tex_buffer[i], - DRM_GEM_DOMAIN_I915_SAMPLER, 0, + I915_GEM_DOMAIN_SAMPLER, 0, state->tex_offset[i]); } else if (state == &i915->meta) { diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c index afcfbcccb9..49a80d3e4a 100644 --- a/src/mesa/drivers/dri/i965/brw_cc.c +++ b/src/mesa/drivers/dri/i965/brw_cc.c @@ -257,7 +257,7 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key) /* Emit CC viewport relocation */ intel_bo_emit_reloc(bo, - DRM_GEM_DOMAIN_I915_INSTRUCTION, + I915_GEM_DOMAIN_INSTRUCTION, 0, 0, offsetof(struct brw_cc_unit_state, cc4), diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c index fd5157bdb7..22bd38a9f3 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_state.c +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c @@ -120,7 +120,7 @@ clip_unit_create_from_key(struct brw_context *brw, /* Emit clip program relocation */ assert(brw->clip.prog_bo); intel_bo_emit_reloc(bo, - DRM_GEM_DOMAIN_I915_INSTRUCTION, + I915_GEM_DOMAIN_INSTRUCTION, 0, clip.thread0.grf_reg_count << 1, offsetof(struct brw_clip_unit_state, thread0), diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index bd0b04c36f..b603be8fc1 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -351,7 +351,7 @@ static void emit_constant_buffer(struct brw_context *brw) } else { OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2)); OUT_RELOC(brw->curbe.curbe_bo, - DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, + I915_GEM_DOMAIN_INSTRUCTION, 0, (sz - 1) + brw->curbe.curbe_offset); } ADVANCE_BATCH(); diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 18ba02423d..0181b06764 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -467,7 +467,7 @@ void brw_emit_vertices( struct brw_context *brw, BRW_VB0_ACCESS_VERTEXDATA | (input->stride << BRW_VB0_PITCH_SHIFT)); OUT_RELOC(input->bo, - DRM_GEM_DOMAIN_I915_VERTEX, 0, + I915_GEM_DOMAIN_VERTEX, 0, input->offset); OUT_BATCH(max_index); OUT_BATCH(0); /* Instance data step rate */ @@ -589,10 +589,10 @@ void brw_emit_indices(struct brw_context *brw, BEGIN_BATCH(4, IGNORE_CLIPRECTS); OUT_BATCH( ib.header.dword ); OUT_RELOC( bo, - DRM_GEM_DOMAIN_I915_VERTEX, 0, + I915_GEM_DOMAIN_VERTEX, 0, offset); OUT_RELOC( bo, - DRM_GEM_DOMAIN_I915_VERTEX, 0, + I915_GEM_DOMAIN_VERTEX, 0, offset + ib_size); OUT_BATCH( 0 ); ADVANCE_BATCH(); diff --git a/src/mesa/drivers/dri/i965/brw_gs_state.c b/src/mesa/drivers/dri/i965/brw_gs_state.c index 953ccf777f..ae6b48a517 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_state.c +++ b/src/mesa/drivers/dri/i965/brw_gs_state.c @@ -107,7 +107,7 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) if (key->prog_active) { /* Emit GS program relocation */ intel_bo_emit_reloc(bo, - DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, + I915_GEM_DOMAIN_INSTRUCTION, 0, gs.thread0.grf_reg_count << 1, offsetof(struct brw_gs_unit_state, thread0), brw->gs.prog_bo); diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index c941e054a3..9d925682c2 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -89,7 +89,7 @@ static void upload_binding_table_pointers(struct brw_context *brw) OUT_BATCH(0); /* clip */ OUT_BATCH(0); /* sf */ OUT_RELOC(brw->wm.bind_bo, - DRM_GEM_DOMAIN_I915_SAMPLER, 0, + I915_GEM_DOMAIN_SAMPLER, 0, 0); ADVANCE_BATCH(); } @@ -116,18 +116,18 @@ static void upload_pipelined_state_pointers(struct brw_context *brw ) BEGIN_BATCH(7, IGNORE_CLIPRECTS); OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2)); - OUT_RELOC(brw->vs.state_bo, DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, 0); + OUT_RELOC(brw->vs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); if (brw->gs.prog_active) - OUT_RELOC(brw->gs.state_bo, DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, 1); + OUT_RELOC(brw->gs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); else OUT_BATCH(0); if (!brw->metaops.active) - OUT_RELOC(brw->clip.state_bo, DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, 1); + OUT_RELOC(brw->clip.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); else OUT_BATCH(0); - OUT_RELOC(brw->sf.state_bo, DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, 0); - OUT_RELOC(brw->wm.state_bo, DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, 0); - OUT_RELOC(brw->cc.state_bo, DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, 0); + OUT_RELOC(brw->sf.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_RELOC(brw->wm.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); ADVANCE_BATCH(); brw->state.dirty.brw |= BRW_NEW_PSP; @@ -235,7 +235,7 @@ static void emit_depthbuffer(struct brw_context *brw) (region->tiled << 27) | (BRW_SURFACE_2D << 29)); OUT_RELOC(region->buffer, - DRM_GEM_DOMAIN_I915_RENDER, DRM_GEM_DOMAIN_I915_RENDER, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) | ((region->pitch - 1) << 6) | diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index e8f36718a3..cbed301d31 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -254,14 +254,14 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, /* Emit SF program relocation */ intel_bo_emit_reloc(bo, - DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, + I915_GEM_DOMAIN_INSTRUCTION, 0, sf.thread0.grf_reg_count << 1, offsetof(struct brw_sf_unit_state, thread0), brw->sf.prog_bo); /* Emit SF viewport relocation */ intel_bo_emit_reloc(bo, - DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, + I915_GEM_DOMAIN_INSTRUCTION, 0, sf.sf5.front_winding | (sf.sf5.viewport_transform << 1), offsetof(struct brw_sf_unit_state, sf5), brw->sf.vp_bo); diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index a6b3db69ea..e18cd42f4e 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -116,7 +116,7 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) /* Emit VS program relocation */ intel_bo_emit_reloc(bo, - DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, + I915_GEM_DOMAIN_INSTRUCTION, 0, vs.thread0.grf_reg_count << 1, offsetof(struct brw_vs_unit_state, thread0), brw->vs.prog_bo); diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index 2e0aff7ab2..461f977aac 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -306,7 +306,7 @@ static int upload_wm_samplers( struct brw_context *brw ) ret |= dri_bufmgr_check_aperture_space(brw->wm.sdc_bo[i]); intel_bo_emit_reloc(brw->wm.sampler_bo, - DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, + I915_GEM_DOMAIN_INSTRUCTION, 0, 0, i * sizeof(struct brw_sampler_state) + offsetof(struct brw_sampler_state, ss2), diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index ef78d71bbb..6fe30f0a9a 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -200,7 +200,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, /* Emit WM program relocation */ intel_bo_emit_reloc(bo, - DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, + I915_GEM_DOMAIN_INSTRUCTION, 0, wm.thread0.grf_reg_count << 1, offsetof(struct brw_wm_unit_state, thread0), brw->wm.prog_bo); @@ -217,7 +217,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, /* Emit sampler state relocation */ if (key->sampler_count != 0) { intel_bo_emit_reloc(bo, - DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, + I915_GEM_DOMAIN_INSTRUCTION, 0, wm.wm4.stats_enable | (wm.wm4.sampler_count << 2), offsetof(struct brw_wm_unit_state, wm4), brw->wm.sampler_bo); diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 6fc6d9dfd8..a7da5e643c 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -204,7 +204,7 @@ brw_create_texture_surface( struct brw_context *brw, /* Emit relocation to surface contents */ intel_bo_emit_reloc(bo, - DRM_GEM_DOMAIN_I915_SAMPLER, 0, + I915_GEM_DOMAIN_SAMPLER, 0, 0, offsetof(struct brw_surface_state, ss1), key->bo); @@ -342,9 +342,9 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, * a more restrictive relocation to emit. */ intel_bo_emit_reloc(brw->wm.surf_bo[unit], - DRM_GEM_DOMAIN_I915_RENDER | - DRM_GEM_DOMAIN_I915_SAMPLER, - DRM_GEM_DOMAIN_I915_RENDER, + I915_GEM_DOMAIN_RENDER | + I915_GEM_DOMAIN_SAMPLER, + I915_GEM_DOMAIN_RENDER, 0, offsetof(struct brw_surface_state, ss1), region_bo); @@ -392,7 +392,7 @@ brw_wm_get_binding_table(struct brw_context *brw) for (i = 0; i < BRW_WM_MAX_SURF; i++) { if (brw->wm.surf_bo[i] != NULL) { intel_bo_emit_reloc(bind_bo, - DRM_GEM_DOMAIN_I915_INSTRUCTION, 0, + I915_GEM_DOMAIN_INSTRUCTION, 0, 0, i * sizeof(GLuint), brw->wm.surf_bo[i]); diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index e8d2ad0ae4..174f5ecab0 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -149,12 +149,12 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv, OUT_BATCH((box.y2 << 16) | box.x2); OUT_RELOC(dst->buffer, - DRM_GEM_DOMAIN_I915_RENDER, DRM_GEM_DOMAIN_I915_RENDER, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); OUT_BATCH((src_y << 16) | src_x); OUT_BATCH(src_pitch); OUT_RELOC(src->buffer, - DRM_GEM_DOMAIN_I915_RENDER, 0, + I915_GEM_DOMAIN_RENDER, 0, 0); ADVANCE_BATCH(); } @@ -225,7 +225,7 @@ intelEmitFillBlit(struct intel_context *intel, OUT_BATCH((y << 16) | x); OUT_BATCH(((y + h) << 16) | (x + w)); OUT_RELOC(dst_buffer, - DRM_GEM_DOMAIN_I915_RENDER, DRM_GEM_DOMAIN_I915_RENDER, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, dst_offset); OUT_BATCH(color); ADVANCE_BATCH(); @@ -347,12 +347,12 @@ intelEmitCopyBlit(struct intel_context *intel, OUT_BATCH((dst_y << 16) | dst_x); OUT_BATCH((dst_y2 << 16) | dst_x2); OUT_RELOC(dst_buffer, - DRM_GEM_DOMAIN_I915_RENDER, DRM_GEM_DOMAIN_I915_RENDER, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, dst_offset); OUT_BATCH((src_y << 16) | src_x); OUT_BATCH(src_pitch); OUT_RELOC(src_buffer, - DRM_GEM_DOMAIN_I915_RENDER, 0, + I915_GEM_DOMAIN_RENDER, 0, src_offset); ADVANCE_BATCH(); } @@ -366,12 +366,12 @@ intelEmitCopyBlit(struct intel_context *intel, OUT_BATCH((0 << 16) | dst_x); OUT_BATCH((h << 16) | dst_x2); OUT_RELOC(dst_buffer, - DRM_GEM_DOMAIN_I915_RENDER, DRM_GEM_DOMAIN_I915_RENDER, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, dst_offset + dst_y * dst_pitch); OUT_BATCH((0 << 16) | src_x); OUT_BATCH(src_pitch); OUT_RELOC(src_buffer, - DRM_GEM_DOMAIN_I915_RENDER, 0, + I915_GEM_DOMAIN_RENDER, 0, src_offset + src_y * src_pitch); ADVANCE_BATCH(); } @@ -551,7 +551,7 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) OUT_BATCH((b.y1 << 16) | b.x1); OUT_BATCH((b.y2 << 16) | b.x2); OUT_RELOC(write_buffer, - DRM_GEM_DOMAIN_I915_RENDER, DRM_GEM_DOMAIN_I915_RENDER, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, irb_region->draw_offset); OUT_BATCH(clearVal); ADVANCE_BATCH(); @@ -624,7 +624,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */ OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */ OUT_RELOC(dst_buffer, - DRM_GEM_DOMAIN_I915_RENDER, DRM_GEM_DOMAIN_I915_RENDER, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, dst_offset); OUT_BATCH(0); /* bg */ OUT_BATCH(fg_color); /* fg */ -- cgit v1.2.3 From e2baf564d1b9716611d194cf117b329a92ad603d Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 17 Jun 2008 11:15:59 -0700 Subject: [intel-gem] Bug #16326: Fix X tile unswizzling on 965. Apparently a bit gets flipped in the addressing for some rows of each tile. --- src/mesa/drivers/dri/intel/intel_span.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index 149b581d88..c6778b16ff 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -106,12 +106,38 @@ static GLubyte *x_tile_swizzle(struct intel_renderbuffer *irb, struct intel_cont x_tile_off = xbyte & 0x1ff; y_tile_off = y & 7; +#ifndef I915 + /* The documentation says that X tile layout is arranged in 8 512-byte + * lines of pixel data. However, that doesn't appear to be the case + * on GM965, tested by drawing a 128x8 quad in no_rast mode. For lines + * 1,2,4, and 7 of each tile, each consecutive pair of 64-byte spans + * has the locations of those spans swapped. + */ + switch (y_tile_off) { + case 1: + case 2: + case 4: + case 7: + x_tile_off ^= 64; + break; + default: + break; + } +#endif + x_tile_number = xbyte >> 9; y_tile_number = y >> 3; tile_off = (y_tile_off << 9) + x_tile_off; tile_base = (x_tile_number << 12) + y_tile_number * tile_stride; +#if 0 + printf("(%d,%d) -> %d + %d = %d (pitch = %d, tstride = %d)\n", + x, y, tile_off, tile_base, + tile_off + tile_base, + irb->pfPitch, tile_stride); +#endif + return buf + tile_base + tile_off; } -- cgit v1.2.3 From 64adeb163d7da6d75b5664cd2ee3783cadaf63d8 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 17 Jun 2008 14:14:02 -0700 Subject: [intel] Fix no_rast option on non-965. The no_rast fallback was getting partially overwritten by later TNL init, resulting in a segfault when things were in a mixed-up state. --- src/mesa/drivers/dri/i915/i830_context.c | 3 +++ src/mesa/drivers/dri/i915/i915_context.c | 3 +++ src/mesa/drivers/dri/intel/intel_context.c | 1 - 3 files changed, 6 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i915/i830_context.c b/src/mesa/drivers/dri/i915/i830_context.c index 166a3bc8e2..d6542f8b3a 100644 --- a/src/mesa/drivers/dri/i915/i830_context.c +++ b/src/mesa/drivers/dri/i915/i830_context.c @@ -88,6 +88,9 @@ i830CreateContext(const __GLcontextModes * mesaVis, _tnl_destroy_pipeline(ctx); _tnl_install_pipeline(ctx, intel_pipeline); + if (intel->no_rast) + FALLBACK(intel, INTEL_FALLBACK_USER, 1); + intel->ctx.Const.MaxTextureUnits = I830_TEX_UNITS; intel->ctx.Const.MaxTextureImageUnits = I830_TEX_UNITS; intel->ctx.Const.MaxTextureCoordUnits = I830_TEX_UNITS; diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c index 59da40229a..bb77cff96c 100644 --- a/src/mesa/drivers/dri/i915/i915_context.c +++ b/src/mesa/drivers/dri/i915/i915_context.c @@ -140,6 +140,9 @@ i915CreateContext(const __GLcontextModes * mesaVis, _tnl_destroy_pipeline(ctx); _tnl_install_pipeline(ctx, intel_pipeline); + if (intel->no_rast) + FALLBACK(intel, INTEL_FALLBACK_USER, 1); + ctx->Const.MaxTextureUnits = I915_TEX_UNITS; ctx->Const.MaxTextureImageUnits = I915_TEX_UNITS; ctx->Const.MaxTextureCoordUnits = I915_TEX_UNITS; diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index f33805ba05..e668b7326a 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -686,7 +686,6 @@ intelInitContext(struct intel_context *intel, /* Force all software fallbacks */ if (driQueryOptionb(&intel->optionCache, "no_rast")) { fprintf(stderr, "disabling 3D rasterization\n"); - FALLBACK(intel, INTEL_FALLBACK_USER, 1); intel->no_rast = 1; } -- cgit v1.2.3 From bbe80af457316826f56ada767d26e8c1db7f1130 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 18 Jun 2008 14:19:18 -0700 Subject: i915: Restore the accelerated PBO pixel path functions after GEM changes. The fencing code is not required, and waiting on the fences defeated one of the purposes of the extension, which is to allow asynchronous readpixels. --- src/mesa/drivers/dri/i915/Makefile | 8 +++----- src/mesa/drivers/dri/i915/i830_context.c | 2 +- src/mesa/drivers/dri/i915/i915_context.c | 2 +- src/mesa/drivers/dri/i915/intel_pixel_read.c | 9 --------- src/mesa/drivers/dri/intel/intel_pixel_draw.c | 8 -------- 5 files changed, 5 insertions(+), 24 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i915/Makefile b/src/mesa/drivers/dri/i915/Makefile index 74f6169b2e..ccba34d229 100644 --- a/src/mesa/drivers/dri/i915/Makefile +++ b/src/mesa/drivers/dri/i915/Makefile @@ -6,11 +6,6 @@ LIBNAME = i915_dri.so MINIGLX_SOURCES = server/intel_dri.c -PIXEL_SOURCES = \ - intel_pixel.c \ - intel_pixel_read.c \ - intel_pixel_draw.c - DRIVER_SOURCES = \ i830_context.c \ i830_metaops.c \ @@ -48,6 +43,9 @@ DRIVER_SOURCES = \ intel_context.c \ intel_decode.c \ intel_ioctl.c \ + intel_pixel.c \ + intel_pixel_draw.c \ + intel_pixel_read.c \ intel_screen.c \ intel_span.c \ intel_state.c \ diff --git a/src/mesa/drivers/dri/i915/i830_context.c b/src/mesa/drivers/dri/i915/i830_context.c index d6542f8b3a..66f1566f16 100644 --- a/src/mesa/drivers/dri/i915/i830_context.c +++ b/src/mesa/drivers/dri/i915/i830_context.c @@ -52,7 +52,7 @@ static void i830InitDriverFunctions(struct dd_function_table *functions) { intelInitDriverFunctions(functions); -// intelInitPixelFuncs(functions); + intelInitPixelFuncs(functions); i830InitStateFuncs(functions); i830InitTextureFuncs(functions); } diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c index bb77cff96c..1128f497db 100644 --- a/src/mesa/drivers/dri/i915/i915_context.c +++ b/src/mesa/drivers/dri/i915/i915_context.c @@ -94,7 +94,7 @@ static void i915InitDriverFunctions(struct dd_function_table *functions) { intelInitDriverFunctions(functions); -// intelInitPixelFuncs(functions); + intelInitPixelFuncs(functions); i915InitStateFunctions(functions); i915InitTextureFuncs(functions); i915InitFragProgFuncs(functions); diff --git a/src/mesa/drivers/dri/i915/intel_pixel_read.c b/src/mesa/drivers/dri/i915/intel_pixel_read.c index 2e31656e98..0b95421a25 100644 --- a/src/mesa/drivers/dri/i915/intel_pixel_read.c +++ b/src/mesa/drivers/dri/i915/intel_pixel_read.c @@ -173,7 +173,6 @@ do_blit_readpixels(GLcontext * ctx, struct intel_buffer_object *dst = intel_buffer_object(pack->BufferObj); GLuint dst_offset; GLuint rowLength; - dri_fence *fence = NULL; if (INTEL_DEBUG & DEBUG_PIXEL) _mesa_printf("%s\n", __FUNCTION__); @@ -275,17 +274,9 @@ do_blit_readpixels(GLcontext * ctx, } intel_batchbuffer_flush(intel->batch); - fence = intel->batch->last_fence; - dri_fence_reference(fence); - } UNLOCK_HARDWARE(intel); - if (fence) { - dri_fence_wait(fence); - dri_fence_unreference(fence); - } - if (INTEL_DEBUG & DEBUG_PIXEL) _mesa_printf("%s - DONE\n", __FUNCTION__); diff --git a/src/mesa/drivers/dri/intel/intel_pixel_draw.c b/src/mesa/drivers/dri/intel/intel_pixel_draw.c index 2804c8deea..f7082f299e 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_draw.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_draw.c @@ -218,7 +218,6 @@ do_blit_drawpixels(GLcontext * ctx, struct intel_buffer_object *src = intel_buffer_object(unpack->BufferObj); GLuint src_offset; GLuint rowLength; - dri_fence *fence = NULL; if (INTEL_DEBUG & DEBUG_PIXEL) _mesa_printf("%s\n", __FUNCTION__); @@ -323,16 +322,9 @@ do_blit_drawpixels(GLcontext * ctx, ctx->Color.LogicOp : GL_COPY); } intel_batchbuffer_flush(intel->batch); - fence = intel->batch->last_fence; - dri_fence_reference(fence); } UNLOCK_HARDWARE(intel); - if (fence) { - dri_fence_wait(fence); - dri_fence_unreference(fence); - } - if (INTEL_DEBUG & DEBUG_PIXEL) _mesa_printf("%s - DONE\n", __FUNCTION__); -- cgit v1.2.3 From 0da9bc6a69fbd287f2e87ca9f868cb4ccc47735a Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 18 Jun 2008 14:35:59 -0700 Subject: i915: Note the non-PBO fallback for textured drawpixels under DEBUG_PIXEL. --- src/mesa/drivers/dri/intel/intel_pixel_draw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_pixel_draw.c b/src/mesa/drivers/dri/intel/intel_pixel_draw.c index f7082f299e..28cd4f0ba6 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_draw.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_draw.c @@ -81,7 +81,8 @@ do_texture_drawpixels(GLcontext * ctx, else { /* PBO only for now: */ -/* _mesa_printf("%s - not PBO\n", __FUNCTION__); */ + if (INTEL_DEBUG & DEBUG_PIXEL) + _mesa_printf("%s - not PBO\n", __FUNCTION__); return GL_FALSE; } -- cgit v1.2.3 From 62d66caeba786f01f6159c980fda79606afe4c61 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 20 Jun 2008 15:00:44 -0700 Subject: i915: Convert to using VBs instead of inline prims. --- src/mesa/drivers/dri/i915/i830_reg.h | 4 - src/mesa/drivers/dri/i915/i830_vtbl.c | 7 +- src/mesa/drivers/dri/i915/i915_reg.h | 112 --------------- src/mesa/drivers/dri/i915/i915_vtbl.c | 6 +- src/mesa/drivers/dri/i915/intel_render.c | 13 +- src/mesa/drivers/dri/i915/intel_tris.c | 219 ++++++++++++++++++----------- src/mesa/drivers/dri/i915/intel_tris.h | 13 +- src/mesa/drivers/dri/intel/intel_context.h | 14 +- src/mesa/drivers/dri/intel/intel_reg.h | 125 ++++++++++++++++ 9 files changed, 299 insertions(+), 214 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i915/i830_reg.h b/src/mesa/drivers/dri/i915/i830_reg.h index d1084a84c0..d210c2d08e 100644 --- a/src/mesa/drivers/dri/i915/i830_reg.h +++ b/src/mesa/drivers/dri/i915/i830_reg.h @@ -494,10 +494,6 @@ #define VFT1_TEX0_FMT(x) (x) #define VFT1_TEX0_MASK 3 #define VFT1_TEX1_SHIFT 2 -#define TEXCOORDFMT_2D 0 -#define TEXCOORDFMT_3D 1 -#define TEXCOORDFMT_4D 2 -#define TEXCOORDFMT_1D 3 /*New stuff picked up along the way */ diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c index cff051b16b..4d3ad0083a 100644 --- a/src/mesa/drivers/dri/i915/i830_vtbl.c +++ b/src/mesa/drivers/dri/i915/i830_vtbl.c @@ -31,6 +31,7 @@ #include "i830_reg.h" #include "intel_batchbuffer.h" #include "intel_regions.h" +#include "intel_tris.h" #include "tnl/t_context.h" #include "tnl/t_vertex.h" @@ -435,7 +436,8 @@ i830_emit_state(struct intel_context *intel) * Set the space as LOOP_CLIPRECTS now, since that's what our primitives * will be emitted under. */ - intel_batchbuffer_require_space(intel->batch, get_state_size(state) + 8, + intel_batchbuffer_require_space(intel->batch, + get_state_size(state) + INTEL_PRIM_EMIT_SIZE, LOOP_CLIPRECTS); count = 0; again: @@ -675,6 +677,9 @@ i830_new_batch(struct intel_context *intel) struct i830_context *i830 = i830_context(&intel->ctx); i830->state.emitted = 0; + /* Signal that we should put new vertices into a new vertex buffer. */ + intel->prim.needs_new_vb = GL_TRUE; + /* Check that we didn't just wrap our batchbuffer at a bad time. */ assert(!intel->no_batch_wrap); } diff --git a/src/mesa/drivers/dri/i915/i915_reg.h b/src/mesa/drivers/dri/i915/i915_reg.h index b718b8610c..8891e11c6f 100644 --- a/src/mesa/drivers/dri/i915/i915_reg.h +++ b/src/mesa/drivers/dri/i915/i915_reg.h @@ -325,118 +325,6 @@ #define SCISSOR_RECT_0_YMAX(x) ((x)<<16) #define SCISSOR_RECT_0_XMAX(x) (x) -/* p189 */ -#define _3DSTATE_LOAD_STATE_IMMEDIATE_1 ((0x3<<29)|(0x1d<<24)|(0x04<<16)) -#define I1_LOAD_S(n) (1<<(4+n)) - -#define S0_VB_OFFSET_MASK 0xffffffc -#define S0_AUTO_CACHE_INV_DISABLE (1<<0) - -#define S1_VERTEX_WIDTH_SHIFT 24 -#define S1_VERTEX_WIDTH_MASK (0x3f<<24) -#define S1_VERTEX_PITCH_SHIFT 16 -#define S1_VERTEX_PITCH_MASK (0x3f<<16) - -#define TEXCOORDFMT_2D 0x0 -#define TEXCOORDFMT_3D 0x1 -#define TEXCOORDFMT_4D 0x2 -#define TEXCOORDFMT_1D 0x3 -#define TEXCOORDFMT_2D_16 0x4 -#define TEXCOORDFMT_4D_16 0x5 -#define TEXCOORDFMT_NOT_PRESENT 0xf -#define S2_TEXCOORD_FMT0_MASK 0xf -#define S2_TEXCOORD_FMT1_SHIFT 4 -#define S2_TEXCOORD_FMT(unit, type) ((type)<<(unit*4)) -#define S2_TEXCOORD_NONE (~0) - -/* S3 not interesting */ - -#define S4_POINT_WIDTH_SHIFT 23 -#define S4_POINT_WIDTH_MASK (0x1ff<<23) -#define S4_LINE_WIDTH_SHIFT 19 -#define S4_LINE_WIDTH_ONE (0x2<<19) -#define S4_LINE_WIDTH_MASK (0xf<<19) -#define S4_FLATSHADE_ALPHA (1<<18) -#define S4_FLATSHADE_FOG (1<<17) -#define S4_FLATSHADE_SPECULAR (1<<16) -#define S4_FLATSHADE_COLOR (1<<15) -#define S4_CULLMODE_BOTH (0<<13) -#define S4_CULLMODE_NONE (1<<13) -#define S4_CULLMODE_CW (2<<13) -#define S4_CULLMODE_CCW (3<<13) -#define S4_CULLMODE_MASK (3<<13) -#define S4_VFMT_POINT_WIDTH (1<<12) -#define S4_VFMT_SPEC_FOG (1<<11) -#define S4_VFMT_COLOR (1<<10) -#define S4_VFMT_DEPTH_OFFSET (1<<9) -#define S4_VFMT_XYZ (1<<6) -#define S4_VFMT_XYZW (2<<6) -#define S4_VFMT_XY (3<<6) -#define S4_VFMT_XYW (4<<6) -#define S4_VFMT_XYZW_MASK (7<<6) -#define S4_FORCE_DEFAULT_DIFFUSE (1<<5) -#define S4_FORCE_DEFAULT_SPECULAR (1<<4) -#define S4_LOCAL_DEPTH_OFFSET_ENABLE (1<<3) -#define S4_VFMT_FOG_PARAM (1<<2) -#define S4_SPRITE_POINT_ENABLE (1<<1) -#define S4_LINE_ANTIALIAS_ENABLE (1<<0) - -#define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH | \ - S4_VFMT_SPEC_FOG | \ - S4_VFMT_COLOR | \ - S4_VFMT_DEPTH_OFFSET | \ - S4_VFMT_XYZW_MASK | \ - S4_VFMT_FOG_PARAM) - - -#define S5_WRITEDISABLE_ALPHA (1<<31) -#define S5_WRITEDISABLE_RED (1<<30) -#define S5_WRITEDISABLE_GREEN (1<<29) -#define S5_WRITEDISABLE_BLUE (1<<28) -#define S5_WRITEDISABLE_MASK (0xf<<28) -#define S5_FORCE_DEFAULT_POINT_SIZE (1<<27) -#define S5_LAST_PIXEL_ENABLE (1<<26) -#define S5_GLOBAL_DEPTH_OFFSET_ENABLE (1<<25) -#define S5_FOG_ENABLE (1<<24) -#define S5_STENCIL_REF_SHIFT 16 -#define S5_STENCIL_REF_MASK (0xff<<16) -#define S5_STENCIL_TEST_FUNC_SHIFT 13 -#define S5_STENCIL_TEST_FUNC_MASK (0x7<<13) -#define S5_STENCIL_FAIL_SHIFT 10 -#define S5_STENCIL_FAIL_MASK (0x7<<10) -#define S5_STENCIL_PASS_Z_FAIL_SHIFT 7 -#define S5_STENCIL_PASS_Z_FAIL_MASK (0x7<<7) -#define S5_STENCIL_PASS_Z_PASS_SHIFT 4 -#define S5_STENCIL_PASS_Z_PASS_MASK (0x7<<4) -#define S5_STENCIL_WRITE_ENABLE (1<<3) -#define S5_STENCIL_TEST_ENABLE (1<<2) -#define S5_COLOR_DITHER_ENABLE (1<<1) -#define S5_LOGICOP_ENABLE (1<<0) - - -#define S6_ALPHA_TEST_ENABLE (1<<31) -#define S6_ALPHA_TEST_FUNC_SHIFT 28 -#define S6_ALPHA_TEST_FUNC_MASK (0x7<<28) -#define S6_ALPHA_REF_SHIFT 20 -#define S6_ALPHA_REF_MASK (0xff<<20) -#define S6_DEPTH_TEST_ENABLE (1<<19) -#define S6_DEPTH_TEST_FUNC_SHIFT 16 -#define S6_DEPTH_TEST_FUNC_MASK (0x7<<16) -#define S6_CBUF_BLEND_ENABLE (1<<15) -#define S6_CBUF_BLEND_FUNC_SHIFT 12 -#define S6_CBUF_BLEND_FUNC_MASK (0x7<<12) -#define S6_CBUF_SRC_BLEND_FACT_SHIFT 8 -#define S6_CBUF_SRC_BLEND_FACT_MASK (0xf<<8) -#define S6_CBUF_DST_BLEND_FACT_SHIFT 4 -#define S6_CBUF_DST_BLEND_FACT_MASK (0xf<<4) -#define S6_DEPTH_WRITE_ENABLE (1<<3) -#define S6_COLOR_WRITE_ENABLE (1<<2) -#define S6_TRISTRIP_PV_SHIFT 0 -#define S6_TRISTRIP_PV_MASK (0x3<<0) - -#define S7_DEPTH_OFFSET_CONST_MASK ~0 - - /* Helper macros for blend factors */ #define DST_BLND_FACT(f) ((f)<batch, get_state_size(state) + 8, + intel_batchbuffer_require_space(intel->batch, + get_state_size(state) + INTEL_PRIM_EMIT_SIZE, LOOP_CLIPRECTS); count = 0; again: @@ -587,6 +589,8 @@ i915_new_batch(struct intel_context *intel) * difficulties associated with them (physical address requirements). */ i915->state.emitted = 0; + /* Signal that we should put new vertices into a new vertex buffer. */ + intel->prim.needs_new_vb = GL_TRUE; /* Check that we didn't just wrap our batchbuffer at a bad time. */ assert(!intel->no_batch_wrap); diff --git a/src/mesa/drivers/dri/i915/intel_render.c b/src/mesa/drivers/dri/i915/intel_render.c index 5e6500cfa1..838d450378 100644 --- a/src/mesa/drivers/dri/i915/intel_render.c +++ b/src/mesa/drivers/dri/i915/intel_render.c @@ -67,7 +67,7 @@ #define HAVE_ELTS 0 -static GLuint hw_prim[GL_POLYGON + 1] = { +static uint32_t hw_prim[GL_POLYGON + 1] = { 0, PRIM3D_LINELIST, PRIM3D_LINESTRIP, @@ -114,7 +114,7 @@ intelDmaPrimitive(struct intel_context *intel, GLenum prim) fprintf(stderr, "%s %s\n", __FUNCTION__, _mesa_lookup_enum_by_nr(prim)); INTEL_FIREVERTICES(intel); intel->vtbl.reduced_primitive_state(intel, reduced_prim[prim]); - intelStartInlinePrimitive(intel, hw_prim[prim], LOOP_CLIPRECTS); + intel_set_prim(intel, hw_prim[prim]); } @@ -126,12 +126,11 @@ do { \ #define FLUSH() INTEL_FIREVERTICES(intel) -#define GET_SUBSEQUENT_VB_MAX_VERTS() \ - ((intel->batch->size - 1500) / (intel->vertex_size*4)) -#define GET_CURRENT_VB_MAX_VERTS() GET_SUBSEQUENT_VB_MAX_VERTS() +#define GET_SUBSEQUENT_VB_MAX_VERTS() (INTEL_VB_SIZE / (intel->vertex_size * 4)) +#define GET_CURRENT_VB_MAX_VERTS() \ + ((INTEL_VB_SIZE - intel->prim.current_offset) / (intel->vertex_size * 4)) -#define ALLOC_VERTS( nr ) \ - intelExtendInlinePrimitive( intel, (nr) * intel->vertex_size ) +#define ALLOC_VERTS(nr) intel_get_prim_space(intel, nr) #define EMIT_VERTS( ctx, j, nr, buf ) \ _tnl_emit_vertices_to_buffer(ctx, j, (j)+(nr), buf ) diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c index bbb4e0f3cd..a1121925cb 100644 --- a/src/mesa/drivers/dri/i915/intel_tris.c +++ b/src/mesa/drivers/dri/i915/intel_tris.c @@ -25,6 +25,12 @@ * **************************************************************************/ +/** @file intel_tris.c + * + * This file contains functions for managing the vertex buffer and emitting + * primitives into it. + */ + #include "glheader.h" #include "context.h" #include "macros.h" @@ -47,111 +53,159 @@ #include "intel_reg.h" #include "intel_span.h" #include "intel_tex.h" +#include "intel_chipset.h" +#include "i830_context.h" +#include "i830_reg.h" static void intelRenderPrimitive(GLcontext * ctx, GLenum prim); static void intelRasterPrimitive(GLcontext * ctx, GLenum rprim, GLuint hwprim); -/* - */ -static void -intel_flush_inline_primitive(struct intel_context *intel) +/** Sets the primitive type for a primitive sequence, flushing as needed. */ +void intel_set_prim(struct intel_context *intel, uint32_t prim) { - GLuint used = intel->batch->ptr - intel->prim.start_ptr; + if (prim != intel->prim.primitive) { + INTEL_FIREVERTICES(intel); + intel->prim.primitive = prim; + } +} - assert(intel->prim.primitive != ~0); +/** Returns mapped VB space for the given number of vertices */ +uint32_t *intel_get_prim_space(struct intel_context *intel, unsigned int count) +{ + uint32_t *addr; + + /* Check for space in the existing VB */ + if (intel->prim.vb_bo == NULL || + intel->prim.needs_new_vb || + (intel->prim.current_offset + + count * intel->vertex_size * 4) > INTEL_VB_SIZE || + (intel->prim.count + count) >= (1 << 16)) { + /* Flush existing prim if any */ + INTEL_FIREVERTICES(intel); -/* _mesa_printf("/\n"); */ + /* Start a new VB */ + dri_bo_unreference(intel->prim.vb_bo); + intel->prim.vb_bo = dri_bo_alloc(intel->bufmgr, "vb", + INTEL_VB_SIZE, 4); + intel->prim.start_offset = 0; + intel->prim.current_offset = 0; - if (used < 8) - goto do_discard; + dri_bufmgr_check_aperture_space(intel->prim.vb_bo); - *(int *) intel->prim.start_ptr = (_3DPRIMITIVE | - intel->prim.primitive | (used / 4 - 2)); + intel->prim.needs_new_vb = GL_FALSE; - goto finished; + dri_bo_map(intel->prim.vb_bo, GL_TRUE); + } - do_discard: - intel->batch->ptr -= used; + intel->prim.flush = intel_flush_prim; - finished: - intel->prim.primitive = ~0; - intel->prim.start_ptr = 0; - intel->prim.flush = 0; -} + addr = (uint32_t *)((char *)intel->prim.vb_bo->virtual + + intel->prim.current_offset); + intel->prim.current_offset += intel->vertex_size * 4 * count; + intel->prim.count += count; + return addr; +} -/* Emit a primitive referencing vertices in a vertex buffer. - */ -void -intelStartInlinePrimitive(struct intel_context *intel, - GLuint prim, GLuint batch_flags) +/** Dispatches the accumulated primitive to the batchbuffer. */ +void intel_flush_prim(struct intel_context *intel) { BATCH_LOCALS; + /* Must be called after an intel_start_prim. */ + assert(intel->prim.primitive != ~0); + + if (intel->prim.count == 0) + return; + intel_wait_flips(intel); + dri_bo_unmap(intel->prim.vb_bo); + intel->vtbl.emit_state(intel); + /* Ensure that we don't start a new batch for the following emit, which + * depends on the state just emitted. emit_state should be making sure we + * have the space for this. + */ intel->no_batch_wrap = GL_TRUE; -/* _mesa_printf("%s *", __progname); */ - - /* Emit a slot which will be filled with the inline primitive - * command later. + /* Check that we actually emitted the state into this batch, using the + * UPLOAD_CTX bit as the signal. */ - BEGIN_BATCH(2, batch_flags); - OUT_BATCH(0); - assert((intel->batch->dirty_state & (1<<1)) == 0); - intel->prim.start_ptr = intel->batch->ptr; - intel->prim.primitive = prim; - intel->prim.flush = intel_flush_inline_primitive; +#if 0 + printf("emitting %d..%d=%d vertices size %d\n", intel->prim.start_offset, + intel->prim.current_offset, intel->prim.count, + intel->vertex_size * 4); +#endif - OUT_BATCH(0); - ADVANCE_BATCH(); + if (IS_9XX(intel->intelScreen->deviceID)) { + BEGIN_BATCH(5, LOOP_CLIPRECTS); + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | + I1_LOAD_S(0) | I1_LOAD_S(1) | 1); + assert((intel->prim.start_offset & !S0_VB_OFFSET_MASK) == 0); + OUT_RELOC(intel->prim.vb_bo, I915_GEM_DOMAIN_VERTEX, 0, + intel->prim.start_offset); + OUT_BATCH((intel->vertex_size << S1_VERTEX_WIDTH_SHIFT) | + (intel->vertex_size << S1_VERTEX_PITCH_SHIFT)); + + OUT_BATCH(_3DPRIMITIVE | + PRIM_INDIRECT | + PRIM_INDIRECT_SEQUENTIAL | + intel->prim.primitive | + intel->prim.count); + OUT_BATCH(0); /* Beginning vertex index */ + ADVANCE_BATCH(); + } else { + struct i830_context *i830 = i830_context(&intel->ctx); + + BEGIN_BATCH(5, LOOP_CLIPRECTS); + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | + I1_LOAD_S(0) | I1_LOAD_S(2) | 1); + /* S0 */ + assert((intel->prim.start_offset & !S0_VB_OFFSET_MASK_830) == 0); + OUT_RELOC(intel->prim.vb_bo, I915_GEM_DOMAIN_VERTEX, 0, + intel->prim.start_offset | + (intel->vertex_size << S0_VB_PITCH_SHIFT_830) | + S0_VB_ENABLE_830); + /* S1 + * This is somewhat unfortunate -- VB width is tied up with + * vertex format data that we've already uploaded through + * _3DSTATE_VFT[01]_CMD. We may want to replace emits of VFT state with + * STATE_IMMEDIATE_1 like this to avoid duplication. + */ + OUT_BATCH((i830->state.Ctx[I830_CTXREG_VF] & VFT0_TEX_COUNT_MASK) >> + VFT0_TEX_COUNT_SHIFT << S2_TEX_COUNT_SHIFT_830 | + (i830->state.Ctx[I830_CTXREG_VF2] << 16) | + intel->vertex_size << S2_VERTEX_0_WIDTH_SHIFT_830); + + OUT_BATCH(_3DPRIMITIVE | + PRIM_INDIRECT | + PRIM_INDIRECT_SEQUENTIAL | + intel->prim.primitive | + intel->prim.count); + OUT_BATCH(0); /* Beginning vertex index */ + ADVANCE_BATCH(); + } intel->no_batch_wrap = GL_FALSE; -/* _mesa_printf(">"); */ -} - - -void -intelWrapInlinePrimitive(struct intel_context *intel) -{ - GLuint prim = intel->prim.primitive; - enum cliprect_mode cliprect_mode = intel->batch->cliprect_mode; - - intel_flush_inline_primitive(intel); - intel_batchbuffer_flush(intel->batch); - intelStartInlinePrimitive(intel, prim, cliprect_mode); /* ??? */ -} - -GLuint * -intelExtendInlinePrimitive(struct intel_context *intel, GLuint dwords) -{ - GLuint sz = dwords * sizeof(GLuint); - GLuint *ptr; - - assert(intel->prim.flush == intel_flush_inline_primitive); - - if (intel_batchbuffer_space(intel->batch) < sz) - intelWrapInlinePrimitive(intel); - -/* _mesa_printf("."); */ - - intel->vtbl.assert_not_dirty(intel); - - ptr = (GLuint *) intel->batch->ptr; - intel->batch->ptr += sz; - - return ptr; + /* If we're going to keep using this VB for more primitives, map it + * again. + */ + if (!intel->prim.needs_new_vb) + dri_bo_map(intel->prim.vb_bo, GL_TRUE); + + intel->prim.flush = NULL; + intel->prim.start_offset = intel->prim.current_offset; + if (!IS_9XX(intel->intelScreen->deviceID)) + intel->prim.start_offset = ALIGN(intel->prim.start_offset, 128); + intel->prim.count = 0; } - - /*********************************************************************** * Emit primitives as inline vertices * ***********************************************************************/ @@ -182,7 +236,7 @@ intel_draw_quad(struct intel_context *intel, intelVertexPtr v1, intelVertexPtr v2, intelVertexPtr v3) { GLuint vertsize = intel->vertex_size; - GLuint *vb = intelExtendInlinePrimitive(intel, 6 * vertsize); + GLuint *vb = intel_get_prim_space(intel, 6); int j; COPY_DWORDS(j, vb, vertsize, v0); @@ -210,7 +264,7 @@ intel_draw_triangle(struct intel_context *intel, intelVertexPtr v0, intelVertexPtr v1, intelVertexPtr v2) { GLuint vertsize = intel->vertex_size; - GLuint *vb = intelExtendInlinePrimitive(intel, 3 * vertsize); + GLuint *vb = intel_get_prim_space(intel, 3); int j; COPY_DWORDS(j, vb, vertsize, v0); @@ -224,7 +278,7 @@ intel_draw_line(struct intel_context *intel, intelVertexPtr v0, intelVertexPtr v1) { GLuint vertsize = intel->vertex_size; - GLuint *vb = intelExtendInlinePrimitive(intel, 2 * vertsize); + GLuint *vb = intel_get_prim_space(intel, 2); int j; COPY_DWORDS(j, vb, vertsize, v0); @@ -236,7 +290,7 @@ static void intel_draw_point(struct intel_context *intel, intelVertexPtr v0) { GLuint vertsize = intel->vertex_size; - GLuint *vb = intelExtendInlinePrimitive(intel, vertsize); + GLuint *vb = intel_get_prim_space(intel, 1); int j; /* Adjust for sub pixel position -- still required for conform. */ @@ -745,7 +799,7 @@ intelFastRenderClippedPoly(GLcontext * ctx, const GLuint * elts, GLuint n) { struct intel_context *intel = intel_context(ctx); const GLuint vertsize = intel->vertex_size; - GLuint *vb = intelExtendInlinePrimitive(intel, (n - 2) * 3 * vertsize); + GLuint *vb = intel_get_prim_space(intel, (n - 2) * 3); GLubyte *vertptr = (GLubyte *) intel->verts; const GLuint *start = (const GLuint *) V(elts[0]); int i, j; @@ -950,7 +1004,7 @@ intelRasterPrimitive(GLcontext * ctx, GLenum rprim, GLuint hwprim) if (hwprim != intel->prim.primitive) { INTEL_FIREVERTICES(intel); - intelStartInlinePrimitive(intel, hwprim, LOOP_CLIPRECTS); + intel_set_prim(intel, hwprim); } } @@ -1083,15 +1137,18 @@ intel_meta_draw_poly(struct intel_context *intel, union fi *vb; GLint i; GLboolean was_locked = intel->locked; + unsigned int saved_vertex_size = intel->vertex_size; if (!was_locked) LOCK_HARDWARE(intel); + intel->vertex_size = 6; + /* All 3d primitives should be emitted with LOOP_CLIPRECTS, * otherwise the drawing origin (DR4) might not be set correctly. */ - intelStartInlinePrimitive(intel, PRIM3D_TRIFAN, LOOP_CLIPRECTS); - vb = (union fi *) intelExtendInlinePrimitive(intel, n * 6); + intel_set_prim(intel, PRIM3D_TRIFAN); + vb = (union fi *) intel_get_prim_space(intel, n); for (i = 0; i < n; i++) { vb[0].f = xy[i][0]; @@ -1105,6 +1162,8 @@ intel_meta_draw_poly(struct intel_context *intel, INTEL_FIREVERTICES(intel); + intel->vertex_size = saved_vertex_size; + if (!was_locked) UNLOCK_HARDWARE(intel); } diff --git a/src/mesa/drivers/dri/i915/intel_tris.h b/src/mesa/drivers/dri/i915/intel_tris.h index 021e5c6450..6b38cd6fbd 100644 --- a/src/mesa/drivers/dri/i915/intel_tris.h +++ b/src/mesa/drivers/dri/i915/intel_tris.h @@ -30,7 +30,9 @@ #include "mtypes.h" - +#define INTEL_VB_SIZE (8 * 1024) +/** 3 dwords of state_immediate and 2 of 3dprim, in intel_flush_prim */ +#define INTEL_PRIM_EMIT_SIZE (5 * 4) #define _INTEL_NEW_RENDERSTATE (_DD_NEW_LINE_STIPPLE | \ _DD_NEW_TRI_UNFILLED | \ @@ -44,11 +46,8 @@ extern void intelInitTriFuncs(GLcontext * ctx); extern void intelChooseRenderState(GLcontext * ctx); -extern void intelStartInlinePrimitive(struct intel_context *intel, - GLuint prim, GLuint flags); -extern void intelWrapInlinePrimitive(struct intel_context *intel); - -GLuint *intelExtendInlinePrimitive(struct intel_context *intel, - GLuint dwords); +void intel_set_prim(struct intel_context *intel, uint32_t prim); +GLuint *intel_get_prim_space(struct intel_context *intel, unsigned int count); +void intel_flush_prim(struct intel_context *intel); #endif diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index 579883437f..1aa9c3d711 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -182,9 +182,19 @@ struct intel_context struct { GLuint id; - GLuint primitive; - GLubyte *start_ptr; + uint32_t primitive; /**< Current hardware primitive type */ void (*flush) (struct intel_context *); + dri_bo *vb_bo; + unsigned int start_offset; /**< Byte offset of primitive sequence */ + unsigned int current_offset; /**< Byte offset of next vertex */ + unsigned int count; /**< Number of vertices in current primitive */ + /** + * Signals when a new VB should be started, regardless of remaining + * space. + * + * Used to avoid rewriting a VB that's being rendered from. + */ + GLboolean needs_new_vb; } prim; GLuint stats_wm; diff --git a/src/mesa/drivers/dri/intel/intel_reg.h b/src/mesa/drivers/dri/intel/intel_reg.h index c12ccf4ae1..96af7e1a03 100644 --- a/src/mesa/drivers/dri/intel/intel_reg.h +++ b/src/mesa/drivers/dri/intel/intel_reg.h @@ -40,6 +40,131 @@ #define MI_WAIT_FOR_PLANE_B_FLIP (1<<6) #define MI_WAIT_FOR_PLANE_A_FLIP (1<<2) +/* p189 */ +#define _3DSTATE_LOAD_STATE_IMMEDIATE_1 (CMD_3D | (0x1d<<24) | (0x04<<16)) +#define I1_LOAD_S(n) (1<<(4+n)) + +/** @{ + * 915 definitions + */ +#define S0_VB_OFFSET_MASK 0xffffffc +#define S0_AUTO_CACHE_INV_DISABLE (1<<0) +/** @} */ + +/** @{ + * 830 definitions + */ +#define S0_VB_OFFSET_MASK_830 0xffffff8 +#define S0_VB_PITCH_SHIFT_830 1 +#define S0_VB_ENABLE_830 0 +/** @} */ + +#define S1_VERTEX_WIDTH_SHIFT 24 +#define S1_VERTEX_WIDTH_MASK (0x3f<<24) +#define S1_VERTEX_PITCH_SHIFT 16 +#define S1_VERTEX_PITCH_MASK (0x3f<<16) + +#define TEXCOORDFMT_2D 0x0 +#define TEXCOORDFMT_3D 0x1 +#define TEXCOORDFMT_4D 0x2 +#define TEXCOORDFMT_1D 0x3 +#define TEXCOORDFMT_2D_16 0x4 +#define TEXCOORDFMT_4D_16 0x5 +#define TEXCOORDFMT_NOT_PRESENT 0xf +#define S2_TEXCOORD_FMT0_MASK 0xf +#define S2_TEXCOORD_FMT1_SHIFT 4 +#define S2_TEXCOORD_FMT(unit, type) ((type)<<(unit*4)) +#define S2_TEXCOORD_NONE (~0) +#define S2_TEX_COUNT_SHIFT_830 12 +#define S2_VERTEX_0_WIDTH_SHIFT_830 0 +#define S2_VERTEX_1_WIDTH_SHIFT_830 6 +/* S3 not interesting */ + +#define S4_POINT_WIDTH_SHIFT 23 +#define S4_POINT_WIDTH_MASK (0x1ff<<23) +#define S4_LINE_WIDTH_SHIFT 19 +#define S4_LINE_WIDTH_ONE (0x2<<19) +#define S4_LINE_WIDTH_MASK (0xf<<19) +#define S4_FLATSHADE_ALPHA (1<<18) +#define S4_FLATSHADE_FOG (1<<17) +#define S4_FLATSHADE_SPECULAR (1<<16) +#define S4_FLATSHADE_COLOR (1<<15) +#define S4_CULLMODE_BOTH (0<<13) +#define S4_CULLMODE_NONE (1<<13) +#define S4_CULLMODE_CW (2<<13) +#define S4_CULLMODE_CCW (3<<13) +#define S4_CULLMODE_MASK (3<<13) +#define S4_VFMT_POINT_WIDTH (1<<12) +#define S4_VFMT_SPEC_FOG (1<<11) +#define S4_VFMT_COLOR (1<<10) +#define S4_VFMT_DEPTH_OFFSET (1<<9) +#define S4_VFMT_XYZ (1<<6) +#define S4_VFMT_XYZW (2<<6) +#define S4_VFMT_XY (3<<6) +#define S4_VFMT_XYW (4<<6) +#define S4_VFMT_XYZW_MASK (7<<6) +#define S4_FORCE_DEFAULT_DIFFUSE (1<<5) +#define S4_FORCE_DEFAULT_SPECULAR (1<<4) +#define S4_LOCAL_DEPTH_OFFSET_ENABLE (1<<3) +#define S4_VFMT_FOG_PARAM (1<<2) +#define S4_SPRITE_POINT_ENABLE (1<<1) +#define S4_LINE_ANTIALIAS_ENABLE (1<<0) + +#define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH | \ + S4_VFMT_SPEC_FOG | \ + S4_VFMT_COLOR | \ + S4_VFMT_DEPTH_OFFSET | \ + S4_VFMT_XYZW_MASK | \ + S4_VFMT_FOG_PARAM) + + +#define S5_WRITEDISABLE_ALPHA (1<<31) +#define S5_WRITEDISABLE_RED (1<<30) +#define S5_WRITEDISABLE_GREEN (1<<29) +#define S5_WRITEDISABLE_BLUE (1<<28) +#define S5_WRITEDISABLE_MASK (0xf<<28) +#define S5_FORCE_DEFAULT_POINT_SIZE (1<<27) +#define S5_LAST_PIXEL_ENABLE (1<<26) +#define S5_GLOBAL_DEPTH_OFFSET_ENABLE (1<<25) +#define S5_FOG_ENABLE (1<<24) +#define S5_STENCIL_REF_SHIFT 16 +#define S5_STENCIL_REF_MASK (0xff<<16) +#define S5_STENCIL_TEST_FUNC_SHIFT 13 +#define S5_STENCIL_TEST_FUNC_MASK (0x7<<13) +#define S5_STENCIL_FAIL_SHIFT 10 +#define S5_STENCIL_FAIL_MASK (0x7<<10) +#define S5_STENCIL_PASS_Z_FAIL_SHIFT 7 +#define S5_STENCIL_PASS_Z_FAIL_MASK (0x7<<7) +#define S5_STENCIL_PASS_Z_PASS_SHIFT 4 +#define S5_STENCIL_PASS_Z_PASS_MASK (0x7<<4) +#define S5_STENCIL_WRITE_ENABLE (1<<3) +#define S5_STENCIL_TEST_ENABLE (1<<2) +#define S5_COLOR_DITHER_ENABLE (1<<1) +#define S5_LOGICOP_ENABLE (1<<0) + + +#define S6_ALPHA_TEST_ENABLE (1<<31) +#define S6_ALPHA_TEST_FUNC_SHIFT 28 +#define S6_ALPHA_TEST_FUNC_MASK (0x7<<28) +#define S6_ALPHA_REF_SHIFT 20 +#define S6_ALPHA_REF_MASK (0xff<<20) +#define S6_DEPTH_TEST_ENABLE (1<<19) +#define S6_DEPTH_TEST_FUNC_SHIFT 16 +#define S6_DEPTH_TEST_FUNC_MASK (0x7<<16) +#define S6_CBUF_BLEND_ENABLE (1<<15) +#define S6_CBUF_BLEND_FUNC_SHIFT 12 +#define S6_CBUF_BLEND_FUNC_MASK (0x7<<12) +#define S6_CBUF_SRC_BLEND_FACT_SHIFT 8 +#define S6_CBUF_SRC_BLEND_FACT_MASK (0xf<<8) +#define S6_CBUF_DST_BLEND_FACT_SHIFT 4 +#define S6_CBUF_DST_BLEND_FACT_MASK (0xf<<4) +#define S6_DEPTH_WRITE_ENABLE (1<<3) +#define S6_COLOR_WRITE_ENABLE (1<<2) +#define S6_TRISTRIP_PV_SHIFT 0 +#define S6_TRISTRIP_PV_MASK (0x3<<0) + +#define S7_DEPTH_OFFSET_CONST_MASK ~0 + /* Primitive dispatch on 830-945 */ #define _3DPRIMITIVE (CMD_3D | (0x1f << 24)) #define PRIM_INDIRECT (1<<23) -- cgit v1.2.3 From a42dac187973cbc17be6c59db89264cbc935ab91 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 23 Jun 2008 15:44:10 -0700 Subject: i915: Accumulate the VB into a local buffer and subdata it in. This lets GEM use pwrite, for an additional 4% or so speedup. --- src/mesa/drivers/dri/i915/i830_vtbl.c | 4 +- src/mesa/drivers/dri/i915/i915_vtbl.c | 3 +- src/mesa/drivers/dri/i915/intel_tris.c | 56 +++++++++++++++++--------- src/mesa/drivers/dri/i915/intel_tris.h | 3 +- src/mesa/drivers/dri/intel/intel_batchbuffer.c | 3 ++ src/mesa/drivers/dri/intel/intel_context.c | 1 + src/mesa/drivers/dri/intel/intel_context.h | 9 +---- 7 files changed, 48 insertions(+), 31 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c index 4d3ad0083a..0af5ed0b50 100644 --- a/src/mesa/drivers/dri/i915/i830_vtbl.c +++ b/src/mesa/drivers/dri/i915/i830_vtbl.c @@ -677,9 +677,6 @@ i830_new_batch(struct intel_context *intel) struct i830_context *i830 = i830_context(&intel->ctx); i830->state.emitted = 0; - /* Signal that we should put new vertices into a new vertex buffer. */ - intel->prim.needs_new_vb = GL_TRUE; - /* Check that we didn't just wrap our batchbuffer at a bad time. */ assert(!intel->no_batch_wrap); } @@ -722,4 +719,5 @@ i830InitVtbl(struct i830_context *i830) i830->intel.vtbl.render_prevalidate = i830_render_prevalidate; i830->intel.vtbl.assert_not_dirty = i830_assert_not_dirty; i830->intel.vtbl.note_unlock = i830_note_unlock; + i830->intel.vtbl.finish_batch = intel_finish_vb; } diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index 23d63fb47a..27dfc2b890 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -589,8 +589,6 @@ i915_new_batch(struct intel_context *intel) * difficulties associated with them (physical address requirements). */ i915->state.emitted = 0; - /* Signal that we should put new vertices into a new vertex buffer. */ - intel->prim.needs_new_vb = GL_TRUE; /* Check that we didn't just wrap our batchbuffer at a bad time. */ assert(!intel->no_batch_wrap); @@ -633,4 +631,5 @@ i915InitVtbl(struct i915_context *i915) i915->intel.vtbl.flush_cmd = i915_flush_cmd; i915->intel.vtbl.assert_not_dirty = i915_assert_not_dirty; i915->intel.vtbl.note_unlock = i915_note_unlock; + i915->intel.vtbl.finish_batch = intel_finish_vb; } diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c index a1121925cb..8714dd15f3 100644 --- a/src/mesa/drivers/dri/i915/intel_tris.c +++ b/src/mesa/drivers/dri/i915/intel_tris.c @@ -77,31 +77,28 @@ uint32_t *intel_get_prim_space(struct intel_context *intel, unsigned int count) /* Check for space in the existing VB */ if (intel->prim.vb_bo == NULL || - intel->prim.needs_new_vb || (intel->prim.current_offset + count * intel->vertex_size * 4) > INTEL_VB_SIZE || (intel->prim.count + count) >= (1 << 16)) { /* Flush existing prim if any */ INTEL_FIREVERTICES(intel); + intel_finish_vb(intel); + /* Start a new VB */ - dri_bo_unreference(intel->prim.vb_bo); + if (intel->prim.vb == NULL) + intel->prim.vb = malloc(INTEL_VB_SIZE); intel->prim.vb_bo = dri_bo_alloc(intel->bufmgr, "vb", INTEL_VB_SIZE, 4); intel->prim.start_offset = 0; intel->prim.current_offset = 0; dri_bufmgr_check_aperture_space(intel->prim.vb_bo); - - intel->prim.needs_new_vb = GL_FALSE; - - dri_bo_map(intel->prim.vb_bo, GL_TRUE); } intel->prim.flush = intel_flush_prim; - addr = (uint32_t *)((char *)intel->prim.vb_bo->virtual + - intel->prim.current_offset); + addr = (uint32_t *)(intel->prim.vb + intel->prim.current_offset); intel->prim.current_offset += intel->vertex_size * 4 * count; intel->prim.count += count; @@ -112,6 +109,7 @@ uint32_t *intel_get_prim_space(struct intel_context *intel, unsigned int count) void intel_flush_prim(struct intel_context *intel) { BATCH_LOCALS; + dri_bo *vb_bo; /* Must be called after an intel_start_prim. */ assert(intel->prim.primitive != ~0); @@ -119,9 +117,13 @@ void intel_flush_prim(struct intel_context *intel) if (intel->prim.count == 0) return; - intel_wait_flips(intel); + /* Keep a reference on the BO as it may get finished as we start the + * batch emit. + */ + vb_bo = intel->prim.vb_bo; + dri_bo_reference(vb_bo); - dri_bo_unmap(intel->prim.vb_bo); + intel_wait_flips(intel); intel->vtbl.emit_state(intel); @@ -147,7 +149,7 @@ void intel_flush_prim(struct intel_context *intel) OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(0) | I1_LOAD_S(1) | 1); assert((intel->prim.start_offset & !S0_VB_OFFSET_MASK) == 0); - OUT_RELOC(intel->prim.vb_bo, I915_GEM_DOMAIN_VERTEX, 0, + OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, intel->prim.start_offset); OUT_BATCH((intel->vertex_size << S1_VERTEX_WIDTH_SHIFT) | (intel->vertex_size << S1_VERTEX_PITCH_SHIFT)); @@ -167,7 +169,7 @@ void intel_flush_prim(struct intel_context *intel) I1_LOAD_S(0) | I1_LOAD_S(2) | 1); /* S0 */ assert((intel->prim.start_offset & !S0_VB_OFFSET_MASK_830) == 0); - OUT_RELOC(intel->prim.vb_bo, I915_GEM_DOMAIN_VERTEX, 0, + OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, intel->prim.start_offset | (intel->vertex_size << S0_VB_PITCH_SHIFT_830) | S0_VB_ENABLE_830); @@ -193,17 +195,35 @@ void intel_flush_prim(struct intel_context *intel) intel->no_batch_wrap = GL_FALSE; - /* If we're going to keep using this VB for more primitives, map it - * again. - */ - if (!intel->prim.needs_new_vb) - dri_bo_map(intel->prim.vb_bo, GL_TRUE); - intel->prim.flush = NULL; intel->prim.start_offset = intel->prim.current_offset; if (!IS_9XX(intel->intelScreen->deviceID)) intel->prim.start_offset = ALIGN(intel->prim.start_offset, 128); intel->prim.count = 0; + + dri_bo_unreference(vb_bo); +} + +/** + * Uploads the locally-accumulated VB into the buffer object. + * + * This avoids us thrashing the cachelines in and out as the buffer gets + * filled, dispatched, then reused as the hardware completes rendering from it, + * and also lets us clflush less if we dispatch with a partially-filled VB. + * + * This is called normally from get_space when we're finishing a BO, but also + * at batch flush time so that we don't try accessing the contents of a + * just-dispatched buffer. + */ +void intel_finish_vb(struct intel_context *intel) +{ + if (intel->prim.vb_bo == NULL) + return; + + dri_bo_subdata(intel->prim.vb_bo, 0, intel->prim.start_offset, + intel->prim.vb); + dri_bo_unreference(intel->prim.vb_bo); + intel->prim.vb_bo = NULL; } /*********************************************************************** diff --git a/src/mesa/drivers/dri/i915/intel_tris.h b/src/mesa/drivers/dri/i915/intel_tris.h index 6b38cd6fbd..0e08986221 100644 --- a/src/mesa/drivers/dri/i915/intel_tris.h +++ b/src/mesa/drivers/dri/i915/intel_tris.h @@ -30,7 +30,7 @@ #include "mtypes.h" -#define INTEL_VB_SIZE (8 * 1024) +#define INTEL_VB_SIZE (32 * 1024) /** 3 dwords of state_immediate and 2 of 3dprim, in intel_flush_prim */ #define INTEL_PRIM_EMIT_SIZE (5 * 4) @@ -49,5 +49,6 @@ extern void intelChooseRenderState(GLcontext * ctx); void intel_set_prim(struct intel_context *intel, uint32_t prim); GLuint *intel_get_prim_space(struct intel_context *intel, unsigned int count); void intel_flush_prim(struct intel_context *intel); +void intel_finish_vb(struct intel_context *intel); #endif diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index 019880581a..9ad9f6a6c0 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -250,6 +250,9 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, * avoid that in the first place. */ batch->ptr = batch->map; + if (intel->vtbl.finish_batch) + intel->vtbl.finish_batch(intel); + /* TODO: Just pass the relocation list and dma buffer up to the * kernel. */ diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index b9e1eae982..16ddbeea9e 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -727,6 +727,7 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv) intel->Fallback = 0; /* don't call _swrast_Flush later */ intel_batchbuffer_free(intel->batch); + free(intel->prim.vb); if (release_texture_heaps) { /* This share group is about to go away, free our private diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index 1aa9c3d711..c314b6e218 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -86,6 +86,7 @@ struct intel_context { void (*destroy) (struct intel_context * intel); void (*emit_state) (struct intel_context * intel); + void (*finish_batch) (struct intel_context * intel); void (*new_batch) (struct intel_context * intel); void (*emit_invarient_state) (struct intel_context * intel); void (*note_fence) (struct intel_context *intel, GLuint fence); @@ -185,16 +186,10 @@ struct intel_context uint32_t primitive; /**< Current hardware primitive type */ void (*flush) (struct intel_context *); dri_bo *vb_bo; + uint8_t *vb; unsigned int start_offset; /**< Byte offset of primitive sequence */ unsigned int current_offset; /**< Byte offset of next vertex */ unsigned int count; /**< Number of vertices in current primitive */ - /** - * Signals when a new VB should be started, regardless of remaining - * space. - * - * Used to avoid rewriting a VB that's being rendered from. - */ - GLboolean needs_new_vb; } prim; GLuint stats_wm; -- cgit v1.2.3 From 93f701bc3619864ac6f067d37212e96545a57e16 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 26 Jun 2008 13:45:31 -0700 Subject: intel: Replace sprinkled intel_batchbuffer_flush with MI_FLUSH or nothing. Most of these were to ensure that caches got synchronized between 2d (or meta) rendering and later use of the target as a source, such as for texture miptree setup. Those are replaced with intel_batchbuffer_emit_mi_flush(), which just drops an MI_FLUSH. Most of the remainder were to ensure that REFERENCES_CLIPRECTS batchbuffers got flushed before the lock was dropped. Those are now replaced by automatically flushing those when dropping the lock. --- src/mesa/drivers/dri/i915/intel_pixel_read.c | 2 - src/mesa/drivers/dri/i965/brw_defines.h | 63 ----------------------- src/mesa/drivers/dri/intel/intel_batchbuffer.h | 8 +++ src/mesa/drivers/dri/intel/intel_blit.c | 25 ++++----- src/mesa/drivers/dri/intel/intel_buffer_objects.c | 1 + src/mesa/drivers/dri/intel/intel_context.c | 3 +- src/mesa/drivers/dri/intel/intel_pixel_bitmap.c | 5 +- src/mesa/drivers/dri/intel/intel_pixel_copy.c | 6 +-- src/mesa/drivers/dri/intel/intel_pixel_draw.c | 3 +- src/mesa/drivers/dri/intel/intel_regions.c | 4 -- src/mesa/drivers/dri/intel/intel_tex_copy.c | 2 - src/mesa/drivers/dri/intel/intel_tex_image.c | 2 - src/mesa/drivers/dri/intel/intel_tex_validate.c | 14 ----- 13 files changed, 26 insertions(+), 112 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i915/intel_pixel_read.c b/src/mesa/drivers/dri/i915/intel_pixel_read.c index 0b95421a25..72e1f9ed28 100644 --- a/src/mesa/drivers/dri/i915/intel_pixel_read.c +++ b/src/mesa/drivers/dri/i915/intel_pixel_read.c @@ -272,8 +272,6 @@ do_blit_readpixels(GLcontext * ctx, rect.x2 - rect.x1, rect.y2 - rect.y1, GL_COPY); } - - intel_batchbuffer_flush(intel->batch); } UNLOCK_HARDWARE(intel); diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 0fb531b1ee..2ed417824d 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -33,68 +33,6 @@ #ifndef BRW_DEFINES_H #define BRW_DEFINES_H -/* - */ -#define MI_NOOP 0x00 -#define MI_USER_INTERRUPT 0x02 -#define MI_WAIT_FOR_EVENT 0x03 -#define MI_REPORT_HEAD 0x07 -#define MI_ARB_ON_OFF 0x08 -#define MI_BATCH_BUFFER_END 0x0A -#define MI_OVERLAY_FLIP 0x11 -#define MI_LOAD_SCAN_LINES_INCL 0x12 -#define MI_LOAD_SCAN_LINES_EXCL 0x13 -#define MI_DISPLAY_BUFFER_INFO 0x14 -#define MI_SET_CONTEXT 0x18 -#define MI_STORE_DATA_IMM 0x20 -#define MI_STORE_DATA_INDEX 0x21 -#define MI_LOAD_REGISTER_IMM 0x22 -#define MI_STORE_REGISTER_MEM 0x24 -#define MI_BATCH_BUFFER_START 0x31 - -#define MI_SYNCHRONOUS_FLIP 0x0 -#define MI_ASYNCHRONOUS_FLIP 0x1 - -#define MI_BUFFER_SECURE 0x0 -#define MI_BUFFER_NONSECURE 0x1 - -#define MI_ARBITRATE_AT_CHAIN_POINTS 0x0 -#define MI_ARBITRATE_BETWEEN_INSTS 0x1 -#define MI_NO_ARBITRATION 0x3 - -#define MI_CONDITION_CODE_WAIT_DISABLED 0x0 -#define MI_CONDITION_CODE_WAIT_0 0x1 -#define MI_CONDITION_CODE_WAIT_1 0x2 -#define MI_CONDITION_CODE_WAIT_2 0x3 -#define MI_CONDITION_CODE_WAIT_3 0x4 -#define MI_CONDITION_CODE_WAIT_4 0x5 - -#define MI_DISPLAY_PIPE_A 0x0 -#define MI_DISPLAY_PIPE_B 0x1 - -#define MI_DISPLAY_PLANE_A 0x0 -#define MI_DISPLAY_PLANE_B 0x1 -#define MI_DISPLAY_PLANE_C 0x2 - -#define MI_STANDARD_FLIP 0x0 -#define MI_ENQUEUE_FLIP_PERFORM_BASE_FRAME_NUMBER_LOAD 0x1 -#define MI_ENQUEUE_FLIP_TARGET_FRAME_NUMBER_RELATIVE 0x2 -#define MI_ENQUEUE_FLIP_ABSOLUTE_TARGET_FRAME_NUMBER 0x3 - -#define MI_PHYSICAL_ADDRESS 0x0 -#define MI_VIRTUAL_ADDRESS 0x1 - -#define MI_BUFFER_MEMORY_MAIN 0x0 -#define MI_BUFFER_MEMORY_GTT 0x2 -#define MI_BUFFER_MEMORY_PER_PROCESS_GTT 0x3 - -#define MI_FLIP_CONTINUE 0x0 -#define MI_FLIP_ON 0x1 -#define MI_FLIP_OFF 0x2 - -#define MI_UNTRUSTED_REGISTER_SPACE 0x0 -#define MI_TRUSTED_REGISTER_SPACE 0x1 - /* 3D state: */ #define _3DOP_3DSTATE_PIPELINED 0x0 @@ -118,7 +56,6 @@ #define _3DSTATE_LINE_STIPPLE 0x08 #define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09 #define _3DCONTROL 0x00 -#define _3DPRIMITIVE 0x00 #define PIPE_CONTROL_NOWRITE 0x00 #define PIPE_CONTROL_WRITEIMMEDIATE 0x01 diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h index d3c656c803..80b87b40f7 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h @@ -4,6 +4,7 @@ #include "mtypes.h" #include "dri_bufmgr.h" +#include "intel_reg.h" struct intel_context; @@ -144,4 +145,11 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch, #define ADVANCE_BATCH() do { } while(0) +static INLINE void +intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch) +{ + intel_batchbuffer_require_space(batch, 4, IGNORE_CLIPRECTS); + intel_batchbuffer_emit_dword(batch, MI_FLUSH); +} + #endif diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index 80d11a01b7..84a455d1cb 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -159,14 +159,10 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv, ADVANCE_BATCH(); } - /* Emit a flush so that, on systems where we don't have automatic flushing - * set (such as 965), the results all land on the screen in a timely - * fashion. + /* Flush the rendering and the batch so that the results all land on the + * screen in a timely fashion. */ - BEGIN_BATCH(1, IGNORE_CLIPRECTS); - OUT_BATCH(MI_FLUSH); - ADVANCE_BATCH(); - + intel_batchbuffer_emit_mi_flush(intel->batch); intel_batchbuffer_flush(intel->batch); } @@ -372,10 +368,7 @@ intelEmitCopyBlit(struct intel_context *intel, src_offset + src_y * src_pitch); ADVANCE_BATCH(); } - BEGIN_BATCH(1, NO_LOOP_CLIPRECTS); - OUT_BATCH(MI_FLUSH); - ADVANCE_BATCH(); - intel_batchbuffer_flush(intel->batch); + intel_batchbuffer_emit_mi_flush(intel->batch); } @@ -556,7 +549,7 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) } } } - intel_batchbuffer_flush(intel->batch); + intel_batchbuffer_emit_mi_flush(intel->batch); } UNLOCK_HARDWARE(intel); @@ -594,7 +587,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, (8 * 4) + (3 * 4) + dwords, - NO_LOOP_CLIPRECTS ); + REFERENCES_CLIPRECTS ); opcode = XY_SETUP_BLT_CMD; if (cpp == 4) @@ -616,7 +609,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, if (dst_tiled) blit_cmd |= XY_DST_TILED; - BEGIN_BATCH(8 + 3, NO_LOOP_CLIPRECTS); + BEGIN_BATCH(8 + 3, REFERENCES_CLIPRECTS); OUT_BATCH(opcode); OUT_BATCH(br13); OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */ @@ -636,5 +629,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, intel_batchbuffer_data( intel->batch, src_bits, dwords * 4, - NO_LOOP_CLIPRECTS ); + REFERENCES_CLIPRECTS ); + + intel_batchbuffer_emit_mi_flush(intel->batch); } diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c index 4227f0c973..1923a21516 100644 --- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c @@ -32,6 +32,7 @@ #include "intel_context.h" #include "intel_buffer_objects.h" +#include "intel_batchbuffer.h" #include "intel_regions.h" #include "dri_bufmgr.h" diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 6d7d6811ac..46acf79721 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -1008,6 +1008,7 @@ void UNLOCK_HARDWARE( struct intel_context *intel ) * Nothing should be left in batch outside of LOCK/UNLOCK which references * cliprects. */ - assert(intel->batch->cliprect_mode != REFERENCES_CLIPRECTS); + if (intel->batch->cliprect_mode == REFERENCES_CLIPRECTS) + intel_batchbuffer_flush(intel->batch); } diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c index 81238acfe4..ce6c6d204f 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c @@ -43,7 +43,7 @@ #include "intel_buffer_objects.h" #include "intel_buffers.h" #include "intel_pixel.h" - +#include "intel_reg.h" #define FILE_DEBUG_FLAG DEBUG_PIXEL @@ -301,9 +301,8 @@ do_blit_bitmap( GLcontext *ctx, } } } - out: - intel_batchbuffer_flush(intel->batch); } +out: UNLOCK_HARDWARE(intel); diff --git a/src/mesa/drivers/dri/intel/intel_pixel_copy.c b/src/mesa/drivers/dri/intel/intel_pixel_copy.c index 45f72bac52..eb4f10e9d5 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_copy.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_copy.c @@ -229,7 +229,7 @@ do_texture_copypixels(GLcontext * ctx, out: intel->vtbl.leave_meta_state(intel); - intel_batchbuffer_flush(intel->batch); + intel_batchbuffer_emit_mi_flush(intel->batch); } UNLOCK_HARDWARE(intel); @@ -345,10 +345,8 @@ do_blit_copypixels(GLcontext * ctx, ctx->Color.ColorLogicOpEnabled ? ctx->Color.LogicOp : GL_COPY); } - - out: - intel_batchbuffer_flush(intel->batch); } +out: UNLOCK_HARDWARE(intel); DBG("%s: success\n", __FUNCTION__); diff --git a/src/mesa/drivers/dri/intel/intel_pixel_draw.c b/src/mesa/drivers/dri/intel/intel_pixel_draw.c index 569e992b5e..2b3445cb28 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_draw.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_draw.c @@ -181,7 +181,7 @@ do_texture_drawpixels(GLcontext * ctx, srcx, srcx + width, srcy + height, srcy); out: intel->vtbl.leave_meta_state(intel); - intel_batchbuffer_flush(intel->batch); + intel_batchbuffer_emit_mi_flush(intel->batch); } UNLOCK_HARDWARE(intel); return GL_TRUE; @@ -322,7 +322,6 @@ do_blit_drawpixels(GLcontext * ctx, ctx->Color.ColorLogicOpEnabled ? ctx->Color.LogicOp : GL_COPY); } - intel_batchbuffer_flush(intel->batch); } UNLOCK_HARDWARE(intel); diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c index c7e2c551dd..ddeffc8ae4 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.c +++ b/src/mesa/drivers/dri/intel/intel_regions.c @@ -376,8 +376,6 @@ intel_region_cow(struct intel_context *intel, struct intel_region *region) /* Now blit from the texture buffer to the new buffer: */ - intel_batchbuffer_flush(intel->batch); - was_locked = intel->locked; if (intel->locked) LOCK_HARDWARE(intel); @@ -390,8 +388,6 @@ intel_region_cow(struct intel_context *intel, struct intel_region *region) region->pitch, region->height, GL_COPY); - intel_batchbuffer_flush(intel->batch); - if (was_locked) UNLOCK_HARDWARE(intel); } diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c index 1add7c6188..8a8eec83aa 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_copy.c +++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c @@ -151,8 +151,6 @@ do_copy_texsubimage(struct intel_context *intel, intelImage->mt->region->tiled, x, y + height, dstx, dsty, width, height, GL_COPY); /* ? */ - - intel_batchbuffer_flush(intel->batch); } } diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c index 95ddbd5920..6d57b2b7dd 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_image.c +++ b/src/mesa/drivers/dri/intel/intel_tex_image.c @@ -235,8 +235,6 @@ try_pbo_upload(struct intel_context *intel, dst_stride, dst_buffer, dst_offset, GL_FALSE, 0, 0, 0, 0, width, height, GL_COPY); - - intel_batchbuffer_flush(intel->batch); } UNLOCK_HARDWARE(intel); diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c b/src/mesa/drivers/dri/intel/intel_tex_validate.c index d260a721d9..b5803fb813 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_validate.c +++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c @@ -124,13 +124,10 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit) struct intel_texture_object *intelObj = intel_texture_object(tObj); int comp_byte = 0; int cpp; - GLuint face, i; GLuint nr_faces = 0; struct intel_texture_image *firstImage; - GLboolean need_flush = GL_FALSE; - /* We know/require this is true by now: */ assert(intelObj->base._Complete); @@ -223,21 +220,10 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit) */ if (intelObj->mt != intelImage->mt) { copy_image_data_to_tree(intel, intelObj, intelImage); - need_flush = GL_TRUE; } } } -#ifdef I915 - /* XXX: what is this flush about? - * On 965, it causes a batch flush in the middle of the state relocation - * emits, which means that the eventual rendering doesn't have all of the - * required relocations in place. - */ - if (need_flush) - intel_batchbuffer_flush(intel->batch); -#endif - return GL_TRUE; } -- cgit v1.2.3 From f059a3302260075e9cfd35649dc3877726291d8d Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 26 Jun 2008 15:34:27 -0700 Subject: intel: Fix locking when doing intel_region_cow(). This was broken in the merge of 965 blit support. It tried to lock only when things were already locked. --- src/mesa/drivers/dri/intel/intel_regions.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c index ddeffc8ae4..5d23c72504 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.c +++ b/src/mesa/drivers/dri/intel/intel_regions.c @@ -377,7 +377,7 @@ intel_region_cow(struct intel_context *intel, struct intel_region *region) */ was_locked = intel->locked; - if (intel->locked) + if (!was_locked) LOCK_HARDWARE(intel); intelEmitCopyBlit(intel, @@ -388,7 +388,7 @@ intel_region_cow(struct intel_context *intel, struct intel_region *region) region->pitch, region->height, GL_COPY); - if (was_locked) + if (!was_locked) UNLOCK_HARDWARE(intel); } -- cgit v1.2.3 From e74f54793e45dd2e36474f6fc527456647f32efd Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 1 Jul 2008 15:09:24 -0700 Subject: intel-gem: Move bit 6 x tiling swizzle to a driconf option, and add new mode. It turns out that it's not just deviceID dependent, and there's some additional undefined factor that determines the bit 6 swizzling. It's now controllable with swizzle_mode=[012] until we get a response on how to automatically detect. --- src/mesa/drivers/dri/intel/intel_context.c | 3 ++ src/mesa/drivers/dri/intel/intel_context.h | 1 + src/mesa/drivers/dri/intel/intel_screen.c | 9 +++++- src/mesa/drivers/dri/intel/intel_span.c | 51 ++++++++++++++++++++---------- 4 files changed, 46 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 46acf79721..33b8843e33 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -697,6 +697,9 @@ intelInitContext(struct intel_context *intel, intel->no_rast = 1; } + intel->tiling_swizzle_mode = driQueryOptioni(&intel->optionCache, + "swizzle_mode"); + /* Disable all hardware rendering (skip emitting batches and fences/waits * to the kernel) */ diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index f1116d2747..6ed9a377e4 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -266,6 +266,7 @@ struct intel_context GLuint lastStamp; GLboolean no_hw; + int tiling_swizzle_mode; /** * Configuration cache diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index 8fd503ee8b..6597dbffed 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -69,13 +69,20 @@ PUBLIC const char __driConfigOptions[] = DRI_CONF_SECTION_QUALITY DRI_CONF_FORCE_S3TC_ENABLE(false) DRI_CONF_ALLOW_LARGE_TEXTURES(2) + DRI_CONF_OPT_BEGIN_V(swizzle_mode, enum, 0, "0:2") + DRI_CONF_DESC_BEGIN(en, "Tiling swizzle mode for software fallbacks") + DRI_CONF_ENUM(0, "No swizzling") + DRI_CONF_ENUM(1, "addr[6] = addr[6] ^ addr[9]") + DRI_CONF_ENUM(2, "addr[6] = addr[6] ^ addr[9] ^ addr[10]") + DRI_CONF_DESC_END + DRI_CONF_OPT_END DRI_CONF_SECTION_END DRI_CONF_SECTION_DEBUG DRI_CONF_NO_RAST(false) DRI_CONF_SECTION_END DRI_CONF_END; -const GLuint __driNConfigOptions = 6; +const GLuint __driNConfigOptions = 7; #ifdef USE_NEW_INTERFACE static PFNGLXCREATECONTEXTMODES create_context_modes = NULL; diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index c6778b16ff..8d7d913ca9 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -106,29 +106,46 @@ static GLubyte *x_tile_swizzle(struct intel_renderbuffer *irb, struct intel_cont x_tile_off = xbyte & 0x1ff; y_tile_off = y & 7; -#ifndef I915 - /* The documentation says that X tile layout is arranged in 8 512-byte - * lines of pixel data. However, that doesn't appear to be the case - * on GM965, tested by drawing a 128x8 quad in no_rast mode. For lines - * 1,2,4, and 7 of each tile, each consecutive pair of 64-byte spans - * has the locations of those spans swapped. + x_tile_number = xbyte >> 9; + y_tile_number = y >> 3; + + tile_off = (y_tile_off << 9) + x_tile_off; + + /* bit swizzling tricks your parents never told you about: + * + * The specs say that the X tiling layout is just 8 512-byte rows + * packed into a page. It turns out that there's some additional + * swizzling of bit 6 to reduce cache aliasing issues. Experimental + * results below: + * + * line bit GM965 945G/Q965 + * 9 10 11 + * 0 0 0 0 0 0 + * 1 0 1 0 1 1 + * 2 1 0 0 1 1 + * 3 1 1 0 0 0 + * 4 0 0 1 1 0 + * 5 0 1 1 0 1 + * 6 1 0 1 0 1 + * 7 1 1 1 1 0 + * + * So we see that the GM965 is bit 6 ^ 9 ^ 10 ^ 11, while other + * parts were just 6 ^ 9 ^ 10. However, some systems, including a + * GM965 we've seen, don't perform the swizzling at all. Information + * on how to detect it through register reads is expected soon. */ - switch (y_tile_off) { + switch (intel->tiling_swizzle_mode) { + case 0: + break; case 1: + tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 4) & 64); + break; case 2: - case 4: - case 7: - x_tile_off ^= 64; - break; - default: + tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 4) & 64) ^ + ((tile_off >> 5) & 64); break; } -#endif - - x_tile_number = xbyte >> 9; - y_tile_number = y >> 3; - tile_off = (y_tile_off << 9) + x_tile_off; tile_base = (x_tile_number << 12) + y_tile_number * tile_stride; #if 0 -- cgit v1.2.3 From 19f585a3cf65887e249d630fe43e83e7e7618dfa Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 2 Jul 2008 09:10:21 -0700 Subject: intel-gem: Fix Y-tiling span setup. The boolean that the server gives us for whether the region is tiled was getting used as the enum for what tiling mode. Instead, guess the correct tiling in screen setup. Also, fix the Y-tiling pitch setup. The pitch to the next tile in Y is 32 scanlines, not 8. --- src/mesa/drivers/dri/intel/intel_fbo.c | 13 ++++++++--- src/mesa/drivers/dri/intel/intel_fbo.h | 7 +++--- src/mesa/drivers/dri/intel/intel_screen.c | 39 ++++++++++++++++++++----------- src/mesa/drivers/dri/intel/intel_screen.h | 6 +++++ src/mesa/drivers/dri/intel/intel_span.c | 7 +++--- src/mesa/drivers/dri/intel/intel_span.h | 7 ++---- 6 files changed, 52 insertions(+), 27 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index bc0b579429..3a3ce68c59 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -376,7 +376,8 @@ intel_renderbuffer_set_region(struct intel_renderbuffer *rb, * not a user-created renderbuffer. */ struct intel_renderbuffer * -intel_create_renderbuffer(GLenum intFormat, int tiling) +intel_create_renderbuffer(intelScreenPrivate *intelScreen, + GLenum intFormat, enum tiling_mode tiling) { GET_CURRENT_CONTEXT(ctx); @@ -449,8 +450,14 @@ intel_create_renderbuffer(GLenum intFormat, int tiling) irb->Base.Delete = intel_delete_renderbuffer; irb->Base.AllocStorage = intel_alloc_window_storage; irb->Base.GetPointer = intel_get_pointer; - /* This sets the Get/PutRow/Value functions */ - intel_set_span_functions(&irb->Base, tiling); + /* This sets the Get/PutRow/Value functions. In classic mode, all access + * is through the aperture and will be swizzled by the fence registers, so + * we don't need the span functions to perfom tile swizzling + */ + if (intelScreen->ttm) + intel_set_span_functions(&irb->Base, tiling); + else + intel_set_span_functions(&irb->Base, INTEL_TILE_NONE); return irb; } diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h index 9e085a1992..23af593960 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.h +++ b/src/mesa/drivers/dri/intel/intel_fbo.h @@ -28,9 +28,9 @@ #ifndef INTEL_FBO_H #define INTEL_FBO_H +#include "intel_screen.h" struct intel_context; -struct intel_region; /** * Intel framebuffer, derived from gl_framebuffer. @@ -72,7 +72,7 @@ struct intel_renderbuffer struct intel_region *region; void *pfMap; /* possibly paged flipped map pointer */ GLuint pfPitch; /* possibly paged flipped pitch */ - int tiling; + enum tiling_mode tiling; GLboolean RenderToTexture; /* RTT? */ GLuint PairedDepth; /**< only used if this is a depth renderbuffer */ @@ -91,7 +91,8 @@ intel_renderbuffer_set_region(struct intel_renderbuffer *irb, struct intel_region *region); extern struct intel_renderbuffer * -intel_create_renderbuffer(GLenum intFormat, int tiling); +intel_create_renderbuffer(intelScreenPrivate *intelScreen, + GLenum intFormat, enum tiling_mode tiling); extern void intel_fbo_init(struct intel_context *intel); diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index 6597dbffed..9e4f48fbd7 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -528,6 +528,7 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, GLboolean swStencil = (mesaVis->stencilBits > 0 && mesaVis->depthBits != 24); GLenum rgbFormat = (mesaVis->redBits == 5 ? GL_RGB5 : GL_RGBA8); + enum tiling_mode tiling; struct intel_framebuffer *intel_fb = CALLOC_STRUCT(intel_framebuffer); @@ -537,34 +538,46 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, _mesa_initialize_framebuffer(&intel_fb->Base, mesaVis); /* setup the hardware-based renderbuffers */ + /* We get only a boolean value from the DDX for whether tiling is + * enabled, so we have to guess when it's Y and not X (965 depth). + */ { - intel_fb->color_rb[0] = intel_create_renderbuffer(rgbFormat, - screen->ttm ? screen->front.tiled : INTEL_TILE_NONE); + tiling = screen->front.tiled ? INTEL_TILE_X : INTEL_TILE_NONE; + intel_fb->color_rb[0] = intel_create_renderbuffer(screen, + rgbFormat, tiling); _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_FRONT_LEFT, &intel_fb->color_rb[0]->Base); } if (mesaVis->doubleBufferMode) { - intel_fb->color_rb[1] = intel_create_renderbuffer(rgbFormat, - screen->ttm ? screen->back.tiled : INTEL_TILE_NONE); + tiling = screen->back.tiled ? INTEL_TILE_X : INTEL_TILE_NONE; + intel_fb->color_rb[1] = intel_create_renderbuffer(screen, + rgbFormat, tiling); + _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_BACK_LEFT, &intel_fb->color_rb[1]->Base); if (screen->third.handle) { struct gl_renderbuffer *tmp_rb = NULL; - - intel_fb->color_rb[2] = intel_create_renderbuffer(rgbFormat, - screen->ttm ? screen->third.tiled : INTEL_TILE_NONE); + tiling = screen->third.tiled ? INTEL_TILE_X : INTEL_TILE_NONE; + intel_fb->color_rb[2] = intel_create_renderbuffer(screen, + rgbFormat, + tiling); _mesa_reference_renderbuffer(&tmp_rb, &intel_fb->color_rb[2]->Base); } } +#ifdef I915 + tiling = screen->depth.tiled ? INTEL_TILE_X : INTEL_TILE_NONE; +#else + tiling = screen->depth.tiled ? INTEL_TILE_Y : INTEL_TILE_NONE; +#endif if (mesaVis->depthBits == 24) { if (mesaVis->stencilBits == 8) { /* combined depth/stencil buffer */ struct intel_renderbuffer *depthStencilRb - = intel_create_renderbuffer(GL_DEPTH24_STENCIL8_EXT, - screen->ttm ? screen->depth.tiled : INTEL_TILE_NONE); + = intel_create_renderbuffer(screen, + GL_DEPTH24_STENCIL8_EXT, tiling); /* note: bind RB to two attachment points */ _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH, &depthStencilRb->Base); @@ -572,8 +585,8 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, &depthStencilRb->Base); } else { struct intel_renderbuffer *depthRb - = intel_create_renderbuffer(GL_DEPTH_COMPONENT24, - screen->ttm ? screen->depth.tiled : INTEL_TILE_NONE); + = intel_create_renderbuffer(screen, + GL_DEPTH_COMPONENT24, tiling); _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH, &depthRb->Base); } @@ -581,8 +594,8 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, else if (mesaVis->depthBits == 16) { /* just 16-bit depth buffer, no hw stencil */ struct intel_renderbuffer *depthRb - = intel_create_renderbuffer(GL_DEPTH_COMPONENT16, - screen->ttm ? screen->depth.tiled : INTEL_TILE_NONE); + = intel_create_renderbuffer(screen, + GL_DEPTH_COMPONENT16, tiling); _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH, &depthRb->Base); } diff --git a/src/mesa/drivers/dri/intel/intel_screen.h b/src/mesa/drivers/dri/intel/intel_screen.h index 9a73b13951..648bf61240 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.h +++ b/src/mesa/drivers/dri/intel/intel_screen.h @@ -33,6 +33,12 @@ #include "i915_drm.h" #include "xmlconfig.h" +enum tiling_mode { + INTEL_TILE_NONE, + INTEL_TILE_X, + INTEL_TILE_Y +}; + /* XXX: change name or eliminate to avoid conflict with "struct * intel_region"!!! */ diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index 8d7d913ca9..6138b262f4 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -168,7 +168,7 @@ static GLubyte *y_tile_swizzle(struct intel_renderbuffer *irb, struct intel_cont int x_tile_number, y_tile_number; int tile_off, tile_base; - tile_stride = (irb->pfPitch * irb->region->cpp) << 3; + tile_stride = (irb->pfPitch * irb->region->cpp) << 5; x += intel->drawX; y += intel->drawY; @@ -181,7 +181,8 @@ static GLubyte *y_tile_swizzle(struct intel_renderbuffer *irb, struct intel_cont x_tile_number = xbyte >> 7; y_tile_number = y >> 5; - tile_off = ((x_tile_off & ~0xf) << 5) + (y_tile_off << 4) + (x_tile_off & 0xf); + tile_off = ((x_tile_off & ~0xf) << 5) + (y_tile_off << 4) + + (x_tile_off & 0xf); tile_base = (x_tile_number << 12) + y_tile_number * tile_stride; return buf + tile_base + tile_off; @@ -670,7 +671,7 @@ intelInitSpanFuncs(GLcontext * ctx) * These are used for the software fallbacks. */ void -intel_set_span_functions(struct gl_renderbuffer *rb, int tiling) +intel_set_span_functions(struct gl_renderbuffer *rb, enum tiling_mode tiling) { if (rb->_ActualFormat == GL_RGB5) { /* 565 RGB */ diff --git a/src/mesa/drivers/dri/intel/intel_span.h b/src/mesa/drivers/dri/intel/intel_span.h index c56e5e1611..1b47c2829c 100644 --- a/src/mesa/drivers/dri/intel/intel_span.h +++ b/src/mesa/drivers/dri/intel/intel_span.h @@ -33,10 +33,7 @@ extern void intelInitSpanFuncs(GLcontext * ctx); extern void intelSpanRenderFinish(GLcontext * ctx); extern void intelSpanRenderStart(GLcontext * ctx); -extern void intel_set_span_functions(struct gl_renderbuffer *rb, int tiling); - -#define INTEL_TILE_NONE 0 -#define INTEL_TILE_X 1 -#define INTEL_TILE_Y 2 +extern void intel_set_span_functions(struct gl_renderbuffer *rb, + enum tiling_mode tiling); #endif -- cgit v1.2.3 From 4b3ed4d2d16811a624857519e95303017f4160b5 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 2 Jul 2008 10:21:44 -0700 Subject: intel-gem: Fix y-tile swizzling for our G965 with swizzle_mode=1. Apparently in Y mode we get bit 6 ^ bit 9. The reflect demo in 'd' mode now displays correctly. --- src/mesa/drivers/dri/intel/intel_span.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index 6138b262f4..7b079afa73 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -183,6 +183,16 @@ static GLubyte *y_tile_swizzle(struct intel_renderbuffer *irb, struct intel_cont tile_off = ((x_tile_off & ~0xf) << 5) + (y_tile_off << 4) + (x_tile_off & 0xf); + + switch (intel->tiling_swizzle_mode) { + case 0: + break; + case 1: + tile_off ^= (tile_off >> 3) & 64; + break; + case 2: + break; + } tile_base = (x_tile_number << 12) + y_tile_number * tile_stride; return buf + tile_base + tile_off; -- cgit v1.2.3 From a995bdced20a55759dffd901c10ec5fb251191cf Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 2 Jul 2008 11:16:30 -0700 Subject: intel-gem: Emit an MI_FLUSH at glFlush() so frontbuffer rendering is flushed. We have something similar in the X Server that covers X Server rendering, this is the equivalent here for rendering to the front buffer. If we cared about avoiding this at glFlush time, we could only do this when some actual frontbuffer rendering had occurred. Bug #16392. --- src/mesa/drivers/dri/intel/intel_context.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 33b8843e33..fa0b4c5618 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -365,6 +365,12 @@ intelFlush(GLcontext * ctx) if (!IS_965(intel->intelScreen->deviceID)) INTEL_FIREVERTICES(intel); + /* Emit a flush so that any frontbuffer rendering that might have occurred + * lands onscreen in a timely manner, even if the X Server doesn't trigger + * a flush for us. + */ + intel_batchbuffer_emit_mi_flush(intel->batch); + if (intel->batch->map != intel->batch->ptr) intel_batchbuffer_flush(intel->batch); } -- cgit v1.2.3 From def6e4f420feed4a07402a8da84e7822f6ddba99 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 2 Jul 2008 11:49:10 -0700 Subject: intel: span rendering requires just a flush before starting, not finish. The dri_bo_map()s that follow will take care of idling the hardware as needed. --- src/mesa/drivers/dri/intel/intel_span.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index 7b079afa73..b1392f794e 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -612,7 +612,7 @@ intelSpanRenderStart(GLcontext * ctx) struct intel_context *intel = intel_context(ctx); GLuint i; - intelFinish(&intel->ctx); + intelFlush(&intel->ctx); LOCK_HARDWARE(intel); #if 0 -- cgit v1.2.3 From 2e841880cfc1006a2818d4a8bfefd21136dc39a9 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 11 Jul 2008 14:16:36 -0700 Subject: drm-gem: Use new GEM ioctls for tiling state, and support new swizzle modes. --- src/mesa/drivers/dri/i965/brw_misc_state.c | 2 +- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 37 ++++-- src/mesa/drivers/dri/intel/intel_blit.c | 24 ++-- src/mesa/drivers/dri/intel/intel_blit.h | 8 +- src/mesa/drivers/dri/intel/intel_context.c | 3 - src/mesa/drivers/dri/intel/intel_context.h | 1 - src/mesa/drivers/dri/intel/intel_fbo.c | 21 +--- src/mesa/drivers/dri/intel/intel_fbo.h | 4 +- src/mesa/drivers/dri/intel/intel_pixel_bitmap.c | 2 +- src/mesa/drivers/dri/intel/intel_pixel_copy.c | 4 +- src/mesa/drivers/dri/intel/intel_pixel_draw.c | 2 +- src/mesa/drivers/dri/intel/intel_regions.c | 129 ++++++++++++++----- src/mesa/drivers/dri/intel/intel_regions.h | 13 +- src/mesa/drivers/dri/intel/intel_screen.c | 133 ++------------------ src/mesa/drivers/dri/intel/intel_screen.h | 6 - src/mesa/drivers/dri/intel/intel_span.c | 154 ++++++++++++----------- src/mesa/drivers/dri/intel/intel_span.h | 3 - src/mesa/drivers/dri/intel/intel_tex_copy.c | 2 +- 18 files changed, 250 insertions(+), 298 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 9d925682c2..bd28235281 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -232,7 +232,7 @@ static void emit_depthbuffer(struct brw_context *brw) OUT_BATCH(((region->pitch * region->cpp) - 1) | (format << 18) | (BRW_TILEWALK_YMAJOR << 26) | - (region->tiled << 27) | + ((region->tiling != I915_TILING_NONE) << 27) | (BRW_SURFACE_2D << 29)); OUT_RELOC(region->buffer, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index a7da5e643c..761a5df33f 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -154,9 +154,28 @@ struct brw_wm_surface_key { GLint first_level, last_level; GLint width, height, depth; GLint pitch, cpp; - GLboolean tiled; + uint32_t tiling; }; +static void +brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling) +{ + switch (tiling) { + case I915_TILING_NONE: + surf->ss3.tiled_surface = 0; + surf->ss3.tile_walk = 0; + break; + case I915_TILING_X: + surf->ss3.tiled_surface = 1; + surf->ss3.tile_walk = BRW_TILEWALK_XMAJOR; + break; + case I915_TILING_Y: + surf->ss3.tiled_surface = 1; + surf->ss3.tile_walk = BRW_TILEWALK_YMAJOR; + break; + } +} + static dri_bo * brw_create_texture_surface( struct brw_context *brw, struct brw_wm_surface_key *key ) @@ -179,9 +198,7 @@ brw_create_texture_surface( struct brw_context *brw, surf.ss2.mip_count = key->last_level - key->first_level; surf.ss2.width = key->width - 1; surf.ss2.height = key->height - 1; - - surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR; - surf.ss3.tiled_surface = key->tiled; + brw_set_surface_tiling(&surf, key->tiling); surf.ss3.pitch = (key->pitch * key->cpp) - 1; surf.ss3.depth = key->depth - 1; @@ -234,7 +251,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) key.pitch = intelObj->mt->pitch; key.cpp = intelObj->mt->cpp; key.depth = firstImage->Depth; - key.tiled = intelObj->mt->region->tiled; + key.tiling = intelObj->mt->region->tiling; ret |= dri_bufmgr_check_aperture_space(key.bo); @@ -267,7 +284,8 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, unsigned int surface_format; unsigned int width, height, cpp; GLubyte color_mask[4]; - GLboolean tiled, color_blend; + GLboolean color_blend; + uint32_t tiling; } key; memset(&key, 0, sizeof(key)); @@ -280,7 +298,7 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; else key.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; - key.tiled = region->tiled; + key.tiling = region->tiling; key.width = region->pitch; /* XXX: not really! */ key.height = region->height; key.cpp = region->cpp; @@ -289,7 +307,7 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, } else { key.surface_type = BRW_SURFACE_NULL; key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - key.tiled = 0; + key.tiling = 0; key.width = 1; key.height = 1; key.cpp = 4; @@ -319,8 +337,7 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, surf.ss2.width = key.width - 1; surf.ss2.height = key.height - 1; - surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR; - surf.ss3.tiled_surface = key.tiled; + brw_set_surface_tiling(&surf, key.tiling); surf.ss3.pitch = (key.width * key.cpp) - 1; /* _NEW_COLOR */ diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index 84a455d1cb..2a05d29124 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -106,11 +106,11 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv, } #ifndef I915 - if (src->tiled) { + if (src->tiling != I915_TILING_NONE) { CMD |= XY_SRC_TILED; src_pitch /= 4; } - if (dst->tiled) { + if (dst->tiling != I915_TILING_NONE) { CMD |= XY_DST_TILED; dst_pitch /= 4; } @@ -178,7 +178,7 @@ intelEmitFillBlit(struct intel_context *intel, GLshort dst_pitch, dri_bo *dst_buffer, GLuint dst_offset, - GLboolean dst_tiled, + uint32_t dst_tiling, GLshort x, GLshort y, GLshort w, GLshort h, GLuint color) @@ -203,7 +203,7 @@ intelEmitFillBlit(struct intel_context *intel, return; } #ifndef I915 - if (dst_tiled) { + if (dst_tiling != I915_TILING_NONE) { CMD |= XY_DST_TILED; dst_pitch /= 4; } @@ -259,11 +259,11 @@ intelEmitCopyBlit(struct intel_context *intel, GLshort src_pitch, dri_bo *src_buffer, GLuint src_offset, - GLboolean src_tiled, + uint32_t src_tiling, GLshort dst_pitch, dri_bo *dst_buffer, GLuint dst_offset, - GLboolean dst_tiled, + uint32_t dst_tiling, GLshort src_x, GLshort src_y, GLshort dst_x, GLshort dst_y, GLshort w, GLshort h, @@ -309,11 +309,11 @@ intelEmitCopyBlit(struct intel_context *intel, } #ifndef I915 - if (dst_tiled) { + if (dst_tiling != I915_TILING_NONE) { CMD |= XY_DST_TILED; dst_pitch /= 4; } - if (src_tiled) { + if (src_tiling != I915_TILING_NONE) { CMD |= XY_SRC_TILED; src_pitch /= 4; } @@ -512,7 +512,7 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) } #ifndef I915 - if (irb_region->tiled) { + if (irb_region->tiling != I915_TILING_NONE) { CMD |= XY_DST_TILED; pitch /= 4; } @@ -563,7 +563,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, GLshort dst_pitch, dri_bo *dst_buffer, GLuint dst_offset, - GLboolean dst_tiled, + uint32_t dst_tiling, GLshort x, GLshort y, GLshort w, GLshort h, GLenum logic_op) @@ -593,7 +593,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, if (cpp == 4) opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; #ifndef I915 - if (dst_tiled) { + if (dst_tiling != I915_TILING_NONE) { opcode |= XY_DST_TILED; dst_pitch /= 4; } @@ -606,7 +606,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, br13 |= BR13_8888; blit_cmd = XY_TEXT_IMMEDIATE_BLIT_CMD | XY_TEXT_BYTE_PACKED; /* packing? */ - if (dst_tiled) + if (dst_tiling != I915_TILING_NONE) blit_cmd |= XY_DST_TILED; BEGIN_BATCH(8 + 3, REFERENCES_CLIPRECTS); diff --git a/src/mesa/drivers/dri/intel/intel_blit.h b/src/mesa/drivers/dri/intel/intel_blit.h index fc0620caba..0881cc4fdc 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.h +++ b/src/mesa/drivers/dri/intel/intel_blit.h @@ -42,11 +42,11 @@ extern void intelEmitCopyBlit(struct intel_context *intel, GLshort src_pitch, dri_bo *src_buffer, GLuint src_offset, - GLboolean src_tiled, + uint32_t src_tiling, GLshort dst_pitch, dri_bo *dst_buffer, GLuint dst_offset, - GLboolean dst_tiled, + uint32_t dst_tiling, GLshort srcx, GLshort srcy, GLshort dstx, GLshort dsty, GLshort w, GLshort h, @@ -57,7 +57,7 @@ extern void intelEmitFillBlit(struct intel_context *intel, GLshort dst_pitch, dri_bo *dst_buffer, GLuint dst_offset, - GLboolean dst_tiled, + uint32_t dst_tiling, GLshort x, GLshort y, GLshort w, GLshort h, GLuint color); @@ -69,7 +69,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, GLshort dst_pitch, dri_bo *dst_buffer, GLuint dst_offset, - GLboolean dst_tiled, + uint32_t dst_tiling, GLshort x, GLshort y, GLshort w, GLshort h, GLenum logic_op); diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index fa0b4c5618..7e3f370ad0 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -703,9 +703,6 @@ intelInitContext(struct intel_context *intel, intel->no_rast = 1; } - intel->tiling_swizzle_mode = driQueryOptioni(&intel->optionCache, - "swizzle_mode"); - /* Disable all hardware rendering (skip emitting batches and fences/waits * to the kernel) */ diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index 6ed9a377e4..f1116d2747 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -266,7 +266,6 @@ struct intel_context GLuint lastStamp; GLboolean no_hw; - int tiling_swizzle_mode; /** * Configuration cache diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index 3a3ce68c59..7663393fba 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -294,10 +294,6 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, rb->Width = width; rb->Height = height; - /* This sets the Get/PutRow/Value functions */ - /* XXX can we choose a different tile here? */ - intel_set_span_functions(&irb->Base, INTEL_TILE_NONE); - return GL_TRUE; } } @@ -376,8 +372,7 @@ intel_renderbuffer_set_region(struct intel_renderbuffer *rb, * not a user-created renderbuffer. */ struct intel_renderbuffer * -intel_create_renderbuffer(intelScreenPrivate *intelScreen, - GLenum intFormat, enum tiling_mode tiling) +intel_create_renderbuffer(GLenum intFormat) { GET_CURRENT_CONTEXT(ctx); @@ -444,20 +439,10 @@ intel_create_renderbuffer(intelScreenPrivate *intelScreen, irb->Base.InternalFormat = intFormat; - irb->tiling = tiling; - /* intel-specific methods */ irb->Base.Delete = intel_delete_renderbuffer; irb->Base.AllocStorage = intel_alloc_window_storage; irb->Base.GetPointer = intel_get_pointer; - /* This sets the Get/PutRow/Value functions. In classic mode, all access - * is through the aperture and will be swizzled by the fence registers, so - * we don't need the span functions to perfom tile swizzling - */ - if (intelScreen->ttm) - intel_set_span_functions(&irb->Base, tiling); - else - intel_set_span_functions(&irb->Base, INTEL_TILE_NONE); return irb; } @@ -568,7 +553,6 @@ intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb, irb->Base.Delete = intel_delete_renderbuffer; irb->Base.AllocStorage = intel_nop_alloc_storage; - intel_set_span_functions(&irb->Base, irb->tiling); irb->RenderToTexture = GL_TRUE; @@ -596,9 +580,6 @@ intel_wrap_texture(GLcontext * ctx, struct gl_texture_image *texImage) _mesa_init_renderbuffer(&irb->Base, name); irb->Base.ClassID = INTEL_RB_CLASS; - /* XXX can we fix this? */ - irb->tiling = INTEL_TILE_NONE; - if (!intel_update_wrapper(ctx, irb, texImage)) { _mesa_free(irb); return NULL; diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h index 23af593960..5fe0fd8abf 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.h +++ b/src/mesa/drivers/dri/intel/intel_fbo.h @@ -72,7 +72,6 @@ struct intel_renderbuffer struct intel_region *region; void *pfMap; /* possibly paged flipped map pointer */ GLuint pfPitch; /* possibly paged flipped pitch */ - enum tiling_mode tiling; GLboolean RenderToTexture; /* RTT? */ GLuint PairedDepth; /**< only used if this is a depth renderbuffer */ @@ -91,8 +90,7 @@ intel_renderbuffer_set_region(struct intel_renderbuffer *irb, struct intel_region *region); extern struct intel_renderbuffer * -intel_create_renderbuffer(intelScreenPrivate *intelScreen, - GLenum intFormat, enum tiling_mode tiling); +intel_create_renderbuffer(GLenum intFormat); extern void intel_fbo_init(struct intel_context *intel); diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c index ce6c6d204f..7e0d20e681 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c @@ -293,7 +293,7 @@ do_blit_bitmap( GLcontext *ctx, dst->pitch, dst->buffer, 0, - dst->tiled, + dst->tiling, rect.x1 + px, rect.y2 - (py + h), w, h, diff --git a/src/mesa/drivers/dri/intel/intel_pixel_copy.c b/src/mesa/drivers/dri/intel/intel_pixel_copy.c index eb4f10e9d5..3093ccf7c6 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_copy.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_copy.c @@ -337,8 +337,8 @@ do_blit_copypixels(GLcontext * ctx, continue; intelEmitCopyBlit(intel, dst->cpp, - src->pitch, src->buffer, 0, src->tiled, - dst->pitch, dst->buffer, 0, dst->tiled, + src->pitch, src->buffer, 0, src->tiling, + dst->pitch, dst->buffer, 0, dst->tiling, clip_x + delta_x, clip_y + delta_y, /* srcx, srcy */ clip_x, clip_y, /* dstx, dsty */ clip_w, clip_h, diff --git a/src/mesa/drivers/dri/intel/intel_pixel_draw.c b/src/mesa/drivers/dri/intel/intel_pixel_draw.c index 2b3445cb28..5675084da5 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_draw.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_draw.c @@ -314,7 +314,7 @@ do_blit_drawpixels(GLcontext * ctx, intelEmitCopyBlit(intel, dest->cpp, rowLength, src_buffer, src_offset, GL_FALSE, - dest->pitch, dest->buffer, 0, dest->tiled, + dest->pitch, dest->buffer, 0, dest->tiling, rect.x1 - dest_rect.x1, rect.y2 - dest_rect.y2, rect.x1, diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c index 5d23c72504..91b835d1aa 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.c +++ b/src/mesa/drivers/dri/intel/intel_regions.c @@ -39,6 +39,9 @@ * last moment. */ +#include +#include + #include "intel_context.h" #include "intel_regions.h" #include "intel_blit.h" @@ -46,6 +49,7 @@ #include "dri_bufmgr.h" #include "intel_bufmgr.h" #include "intel_batchbuffer.h" +#include "intel_chipset.h" #define FILE_DEBUG_FLAG DEBUG_REGION @@ -76,10 +80,34 @@ intel_region_unmap(struct intel_context *intel, struct intel_region *region) } } +static int +intel_set_region_tiling_gem(struct intel_context *intel, + struct intel_region *region, + uint32_t bo_handle) +{ + struct drm_i915_gem_get_tiling get_tiling; + int ret; + + memset(&get_tiling, 0, sizeof(get_tiling)); + + get_tiling.handle = bo_handle; + ret = ioctl(intel->driFd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling); + if (ret != 0) { + fprintf(stderr, "Failed to get tiling state for region: %s\n", + strerror(errno)); + return ret; + } + + region->tiling = get_tiling.tiling_mode; + region->bit_6_swizzle = get_tiling.swizzle_mode; + + return 0; +} + static struct intel_region * intel_region_alloc_internal(struct intel_context *intel, GLuint cpp, GLuint pitch, GLuint height, - GLuint tiled, dri_bo *buffer) + dri_bo *buffer) { struct intel_region *region; @@ -93,9 +121,12 @@ intel_region_alloc_internal(struct intel_context *intel, region->pitch = pitch; region->height = height; /* needed? */ region->refcount = 1; - region->tiled = tiled; region->buffer = buffer; + /* Default to no tiling */ + region->tiling = I915_TILING_NONE; + region->bit_6_swizzle = I915_BIT_6_SWIZZLE_NONE; + return region; } @@ -108,20 +139,26 @@ intel_region_alloc(struct intel_context *intel, buffer = dri_bo_alloc(intel->bufmgr, "region", pitch * cpp * height, 64); - return intel_region_alloc_internal(intel, cpp, pitch, height, 0, buffer); + return intel_region_alloc_internal(intel, cpp, pitch, height, buffer); } struct intel_region * intel_region_alloc_for_handle(struct intel_context *intel, GLuint cpp, GLuint pitch, GLuint height, - GLuint tiled, GLuint handle) + GLuint handle) { + struct intel_region *region; dri_bo *buffer; - buffer = intel_bo_gem_create_from_name(intel->bufmgr, "region", handle); + buffer = intel_bo_gem_create_from_name(intel->bufmgr, "dri2 region", handle); + + region = intel_region_alloc_internal(intel, cpp, pitch, height, buffer); + if (region == NULL) + return region; + + intel_set_region_tiling_gem(intel, region, handle); - return intel_region_alloc_internal(intel, - cpp, pitch, height, tiled, buffer); + return region; } void @@ -135,26 +172,34 @@ intel_region_reference(struct intel_region **dst, struct intel_region *src) } void -intel_region_release(struct intel_region **region) +intel_region_release(struct intel_region **region_handle) { - if (!*region) + struct intel_region *region = *region_handle; + + if (region == NULL) return; - DBG("%s %d\n", __FUNCTION__, (*region)->refcount - 1); + DBG("%s %d\n", __FUNCTION__, region->refcount - 1); - ASSERT((*region)->refcount > 0); - (*region)->refcount--; + ASSERT(region->refcount > 0); + region->refcount--; - if ((*region)->refcount == 0) { - assert((*region)->map_refcount == 0); + if (region->refcount == 0) { + assert(region->map_refcount == 0); - if ((*region)->pbo) - (*region)->pbo->region = NULL; - (*region)->pbo = NULL; - dri_bo_unreference((*region)->buffer); - free(*region); + if (region->pbo) + region->pbo->region = NULL; + region->pbo = NULL; + dri_bo_unreference(region->buffer); + + if (region->classic_map != NULL) { + drmUnmap(region->classic_map, + region->pitch * region->cpp * region->height); + } + + free(region); } - *region = NULL; + *region_handle = NULL; } /* @@ -269,8 +314,8 @@ intel_region_copy(struct intel_context *intel, intelEmitCopyBlit(intel, dst->cpp, - src->pitch, src->buffer, src_offset, src->tiled, - dst->pitch, dst->buffer, dst_offset, dst->tiled, + src->pitch, src->buffer, src_offset, src->tiling, + dst->pitch, dst->buffer, dst_offset, dst->tiling, srcx, srcy, dstx, dsty, width, height, GL_COPY); } @@ -300,7 +345,7 @@ intel_region_fill(struct intel_context *intel, intelEmitFillBlit(intel, dst->cpp, - dst->pitch, dst->buffer, dst_offset, dst->tiled, + dst->pitch, dst->buffer, dst_offset, dst->tiling, dstx, dsty, width, height, color); } @@ -382,8 +427,8 @@ intel_region_cow(struct intel_context *intel, struct intel_region *region) intelEmitCopyBlit(intel, region->cpp, - region->pitch, region->buffer, 0, region->tiled, - region->pitch, pbo->buffer, 0, region->tiled, + region->pitch, region->buffer, 0, region->tiling, + region->pitch, pbo->buffer, 0, region->tiling, 0, 0, 0, 0, region->pitch, region->height, GL_COPY); @@ -414,6 +459,7 @@ intel_recreate_static(struct intel_context *intel, GLuint mem_type) { intelScreenPrivate *intelScreen = intel->intelScreen; + int ret; if (region == NULL) { region = calloc(sizeof(*region), 1); @@ -426,20 +472,45 @@ intel_recreate_static(struct intel_context *intel, region->cpp = intel->ctx.Visual.rgbBits / 8; region->pitch = intelScreen->pitch; region->height = intelScreen->height; /* needed? */ - region->tiled = region_desc->tiled; if (intel->ttm) { assert(region_desc->bo_handle != -1); region->buffer = intel_bo_gem_create_from_name(intel->bufmgr, name, region_desc->bo_handle); + + intel_set_region_tiling_gem(intel, region, region_desc->bo_handle); } else { + ret = drmMap(intel->driFd, region_desc->handle, + region->pitch * region->cpp * region->height, + ®ion->classic_map); + if (ret != 0) { + fprintf(stderr, "Failed to drmMap %s buffer\n", name); + free(region); + return NULL; + } + region->buffer = intel_bo_fake_alloc_static(intel->bufmgr, name, region_desc->offset, - intelScreen->pitch * - intelScreen->height, - region_desc->map); + region->pitch * region->cpp * + region->height, + region->classic_map); + + /* The sarea just gives us a boolean for whether it's tiled or not, + * instead of which tiling mode it is. Guess. + */ + if (region_desc->tiled) { + if (IS_965(intel->intelScreen->deviceID) && + region_desc == &intelScreen->depth) + region->tiling = I915_TILING_Y; + else + region->tiling = I915_TILING_X; + } else { + region->tiling = I915_TILING_NONE; + } + + region->bit_6_swizzle = I915_BIT_6_SWIZZLE_NONE; } assert(region->buffer != NULL); diff --git a/src/mesa/drivers/dri/intel/intel_regions.h b/src/mesa/drivers/dri/intel/intel_regions.h index 229f79aeba..e5f19fbb45 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.h +++ b/src/mesa/drivers/dri/intel/intel_regions.h @@ -28,6 +28,12 @@ #ifndef INTEL_REGIONS_H #define INTEL_REGIONS_H +/** @file intel_regions.h + * + * Structure definitions and prototypes for intel_region handling, which is + * the basic structure for rectangular collections of pixels stored in a dri_bo. + */ + #include "mtypes.h" #include "dri_bufmgr.h" @@ -53,8 +59,9 @@ struct intel_region GLuint map_refcount; /**< Reference count for mapping */ GLuint draw_offset; /**< Offset of drawing address within the region */ - GLboolean tiled; /**< True if the region is X or Y-tiled. Used on 965. */ - + uint32_t tiling; /**< Which tiling mode the region is in */ + uint32_t bit_6_swizzle; /**< GEM flag for address swizzling requirement */ + drmAddress classic_map; /**< drmMap of the region when not in GEM mode */ struct intel_buffer_object *pbo; /* zero-copy uploads */ }; @@ -69,7 +76,7 @@ struct intel_region *intel_region_alloc(struct intel_context *intel, struct intel_region * intel_region_alloc_for_handle(struct intel_context *intel, GLuint cpp, GLuint pitch, GLuint height, - GLuint tiled, unsigned int handle); + unsigned int handle); void intel_region_reference(struct intel_region **dst, struct intel_region *src); diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index 9e4f48fbd7..36dce171c6 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -69,20 +69,13 @@ PUBLIC const char __driConfigOptions[] = DRI_CONF_SECTION_QUALITY DRI_CONF_FORCE_S3TC_ENABLE(false) DRI_CONF_ALLOW_LARGE_TEXTURES(2) - DRI_CONF_OPT_BEGIN_V(swizzle_mode, enum, 0, "0:2") - DRI_CONF_DESC_BEGIN(en, "Tiling swizzle mode for software fallbacks") - DRI_CONF_ENUM(0, "No swizzling") - DRI_CONF_ENUM(1, "addr[6] = addr[6] ^ addr[9]") - DRI_CONF_ENUM(2, "addr[6] = addr[6] ^ addr[9] ^ addr[10]") - DRI_CONF_DESC_END - DRI_CONF_OPT_END DRI_CONF_SECTION_END DRI_CONF_SECTION_DEBUG DRI_CONF_NO_RAST(false) DRI_CONF_SECTION_END DRI_CONF_END; -const GLuint __driNConfigOptions = 7; +const GLuint __driNConfigOptions = 6; #ifdef USE_NEW_INTERFACE static PFNGLXCREATECONTEXTMODES create_context_modes = NULL; @@ -97,51 +90,6 @@ intelMapScreenRegions(__DRIscreenPrivate * sPriv) { intelScreenPrivate *intelScreen = (intelScreenPrivate *) sPriv->private; - if (intelScreen->front.handle) { - if (drmMap(sPriv->fd, - intelScreen->front.handle, - intelScreen->front.size, - (drmAddress *) & intelScreen->front.map) != 0) { - _mesa_problem(NULL, "drmMap(frontbuffer) failed!"); - return GL_FALSE; - } - } - else { - _mesa_warning(NULL, "no front buffer handle in intelMapScreenRegions!"); - } - - if (0) - _mesa_printf("Back 0x%08x ", intelScreen->back.handle); - if (drmMap(sPriv->fd, - intelScreen->back.handle, - intelScreen->back.size, - (drmAddress *) & intelScreen->back.map) != 0) { - intelUnmapScreenRegions(intelScreen); - return GL_FALSE; - } - - if (intelScreen->third.handle) { - if (0) - _mesa_printf("Third 0x%08x ", intelScreen->third.handle); - if (drmMap(sPriv->fd, - intelScreen->third.handle, - intelScreen->third.size, - (drmAddress *) & intelScreen->third.map) != 0) { - intelUnmapScreenRegions(intelScreen); - return GL_FALSE; - } - } - - if (0) - _mesa_printf("Depth 0x%08x ", intelScreen->depth.handle); - if (drmMap(sPriv->fd, - intelScreen->depth.handle, - intelScreen->depth.size, - (drmAddress *) & intelScreen->depth.map) != 0) { - intelUnmapScreenRegions(intelScreen); - return GL_FALSE; - } - if (0) _mesa_printf("TEX 0x%08x ", intelScreen->tex.handle); if (intelScreen->tex.size != 0) { @@ -154,50 +102,15 @@ intelMapScreenRegions(__DRIscreenPrivate * sPriv) } } - if (0) - printf("Mappings: front: %p back: %p third: %p depth: %p tex: %p\n", - intelScreen->front.map, - intelScreen->back.map, intelScreen->third.map, - intelScreen->depth.map, intelScreen->tex.map); return GL_TRUE; } void intelUnmapScreenRegions(intelScreenPrivate * intelScreen) { -#define REALLY_UNMAP 1 - if (intelScreen->front.map) { -#if REALLY_UNMAP - if (drmUnmap(intelScreen->front.map, intelScreen->front.size) != 0) - printf("drmUnmap front failed!\n"); -#endif - intelScreen->front.map = NULL; - } - if (intelScreen->back.map) { -#if REALLY_UNMAP - if (drmUnmap(intelScreen->back.map, intelScreen->back.size) != 0) - printf("drmUnmap back failed!\n"); -#endif - intelScreen->back.map = NULL; - } - if (intelScreen->third.map) { -#if REALLY_UNMAP - if (drmUnmap(intelScreen->third.map, intelScreen->third.size) != 0) - printf("drmUnmap third failed!\n"); -#endif - intelScreen->third.map = NULL; - } - if (intelScreen->depth.map) { -#if REALLY_UNMAP - drmUnmap(intelScreen->depth.map, intelScreen->depth.size); - intelScreen->depth.map = NULL; -#endif - } if (intelScreen->tex.map) { -#if REALLY_UNMAP drmUnmap(intelScreen->tex.map, intelScreen->tex.size); intelScreen->tex.map = NULL; -#endif } } @@ -341,8 +254,6 @@ intelHandleDrawableConfig(__DRIdrawablePrivate *dPriv, * attached. */ } -#define BUFFER_FLAG_TILED 0x0100 - /** * DRI2 entrypoint */ @@ -355,7 +266,6 @@ intelHandleBufferAttach(__DRIdrawablePrivate *dPriv, struct intel_renderbuffer *rb; struct intel_region *region; struct intel_context *intel = pcp->driverPrivate; - GLuint tiled; switch (ba->buffer.attachment) { case DRI_DRAWABLE_BUFFER_FRONT_LEFT: @@ -389,10 +299,9 @@ intelHandleBufferAttach(__DRIdrawablePrivate *dPriv, return; #endif - tiled = (ba->buffer.flags & BUFFER_FLAG_TILED) > 0; region = intel_region_alloc_for_handle(intel, ba->buffer.cpp, ba->buffer.pitch / ba->buffer.cpp, - dPriv->h, tiled, + dPriv->h, ba->buffer.handle); intel_renderbuffer_set_region(rb, region); @@ -528,7 +437,6 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, GLboolean swStencil = (mesaVis->stencilBits > 0 && mesaVis->depthBits != 24); GLenum rgbFormat = (mesaVis->redBits == 5 ? GL_RGB5 : GL_RGBA8); - enum tiling_mode tiling; struct intel_framebuffer *intel_fb = CALLOC_STRUCT(intel_framebuffer); @@ -538,46 +446,29 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, _mesa_initialize_framebuffer(&intel_fb->Base, mesaVis); /* setup the hardware-based renderbuffers */ - /* We get only a boolean value from the DDX for whether tiling is - * enabled, so we have to guess when it's Y and not X (965 depth). - */ - { - tiling = screen->front.tiled ? INTEL_TILE_X : INTEL_TILE_NONE; - intel_fb->color_rb[0] = intel_create_renderbuffer(screen, - rgbFormat, tiling); - _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_FRONT_LEFT, - &intel_fb->color_rb[0]->Base); - } + intel_fb->color_rb[0] = intel_create_renderbuffer(rgbFormat); + _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_FRONT_LEFT, + &intel_fb->color_rb[0]->Base); if (mesaVis->doubleBufferMode) { - tiling = screen->back.tiled ? INTEL_TILE_X : INTEL_TILE_NONE; - intel_fb->color_rb[1] = intel_create_renderbuffer(screen, - rgbFormat, tiling); + intel_fb->color_rb[1] = intel_create_renderbuffer(rgbFormat); _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_BACK_LEFT, &intel_fb->color_rb[1]->Base); if (screen->third.handle) { struct gl_renderbuffer *tmp_rb = NULL; - tiling = screen->third.tiled ? INTEL_TILE_X : INTEL_TILE_NONE; - intel_fb->color_rb[2] = intel_create_renderbuffer(screen, - rgbFormat, - tiling); + + intel_fb->color_rb[2] = intel_create_renderbuffer(rgbFormat); _mesa_reference_renderbuffer(&tmp_rb, &intel_fb->color_rb[2]->Base); } } -#ifdef I915 - tiling = screen->depth.tiled ? INTEL_TILE_X : INTEL_TILE_NONE; -#else - tiling = screen->depth.tiled ? INTEL_TILE_Y : INTEL_TILE_NONE; -#endif if (mesaVis->depthBits == 24) { if (mesaVis->stencilBits == 8) { /* combined depth/stencil buffer */ struct intel_renderbuffer *depthStencilRb - = intel_create_renderbuffer(screen, - GL_DEPTH24_STENCIL8_EXT, tiling); + = intel_create_renderbuffer(GL_DEPTH24_STENCIL8_EXT); /* note: bind RB to two attachment points */ _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH, &depthStencilRb->Base); @@ -585,8 +476,7 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, &depthStencilRb->Base); } else { struct intel_renderbuffer *depthRb - = intel_create_renderbuffer(screen, - GL_DEPTH_COMPONENT24, tiling); + = intel_create_renderbuffer(GL_DEPTH_COMPONENT24); _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH, &depthRb->Base); } @@ -594,8 +484,7 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, else if (mesaVis->depthBits == 16) { /* just 16-bit depth buffer, no hw stencil */ struct intel_renderbuffer *depthRb - = intel_create_renderbuffer(screen, - GL_DEPTH_COMPONENT16, tiling); + = intel_create_renderbuffer(GL_DEPTH_COMPONENT16); _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH, &depthRb->Base); } diff --git a/src/mesa/drivers/dri/intel/intel_screen.h b/src/mesa/drivers/dri/intel/intel_screen.h index 648bf61240..9a73b13951 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.h +++ b/src/mesa/drivers/dri/intel/intel_screen.h @@ -33,12 +33,6 @@ #include "i915_drm.h" #include "xmlconfig.h" -enum tiling_mode { - INTEL_TILE_NONE, - INTEL_TILE_X, - INTEL_TILE_Y -}; - /* XXX: change name or eliminate to avoid conflict with "struct * intel_region"!!! */ diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index b1392f794e..3065d15e32 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -39,6 +39,10 @@ #include "swrast/swrast.h" +static void +intel_set_span_functions(struct intel_context *intel, + struct gl_renderbuffer *rb); + /* * Deal with tiled surfaces */ @@ -111,39 +115,26 @@ static GLubyte *x_tile_swizzle(struct intel_renderbuffer *irb, struct intel_cont tile_off = (y_tile_off << 9) + x_tile_off; - /* bit swizzling tricks your parents never told you about: - * - * The specs say that the X tiling layout is just 8 512-byte rows - * packed into a page. It turns out that there's some additional - * swizzling of bit 6 to reduce cache aliasing issues. Experimental - * results below: - * - * line bit GM965 945G/Q965 - * 9 10 11 - * 0 0 0 0 0 0 - * 1 0 1 0 1 1 - * 2 1 0 0 1 1 - * 3 1 1 0 0 0 - * 4 0 0 1 1 0 - * 5 0 1 1 0 1 - * 6 1 0 1 0 1 - * 7 1 1 1 1 0 - * - * So we see that the GM965 is bit 6 ^ 9 ^ 10 ^ 11, while other - * parts were just 6 ^ 9 ^ 10. However, some systems, including a - * GM965 we've seen, don't perform the swizzling at all. Information - * on how to detect it through register reads is expected soon. - */ - switch (intel->tiling_swizzle_mode) { - case 0: + switch (irb->region->bit_6_swizzle) { + case I915_BIT_6_SWIZZLE_NONE: break; - case 1: + case I915_BIT_6_SWIZZLE_9: + tile_off ^= ((tile_off >> 3) & 64); + break; + case I915_BIT_6_SWIZZLE_9_10: tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 4) & 64); break; - case 2: + case I915_BIT_6_SWIZZLE_9_11: + tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 5) & 64); + break; + case I915_BIT_6_SWIZZLE_9_10_11: tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 4) & 64) ^ ((tile_off >> 5) & 64); break; + default: + fprintf(stderr, "Unknown tile swizzling mode %d\n", + irb->region->bit_6_swizzle); + exit(1); } tile_base = (x_tile_number << 12) + y_tile_number * tile_stride; @@ -184,15 +175,28 @@ static GLubyte *y_tile_swizzle(struct intel_renderbuffer *irb, struct intel_cont tile_off = ((x_tile_off & ~0xf) << 5) + (y_tile_off << 4) + (x_tile_off & 0xf); - switch (intel->tiling_swizzle_mode) { - case 0: + switch (irb->region->bit_6_swizzle) { + case I915_BIT_6_SWIZZLE_NONE: + break; + case I915_BIT_6_SWIZZLE_9: + tile_off ^= ((tile_off >> 3) & 64); + break; + case I915_BIT_6_SWIZZLE_9_10: + tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 4) & 64); break; - case 1: - tile_off ^= (tile_off >> 3) & 64; + case I915_BIT_6_SWIZZLE_9_11: + tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 5) & 64); break; - case 2: + case I915_BIT_6_SWIZZLE_9_10_11: + tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 4) & 64) ^ + ((tile_off >> 5) & 64); break; + default: + fprintf(stderr, "Unknown tile swizzling mode %d\n", + irb->region->bit_6_swizzle); + exit(1); } + tile_base = (x_tile_number << 12) + y_tile_number * tile_stride; return buf + tile_base + tile_off; @@ -491,16 +495,14 @@ intel_map_unmap_buffers(struct intel_context *intel, GLboolean map) for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++) { struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[j]; irb = intel_renderbuffer(rb); - if (irb) { - /* this is a user-created intel_renderbuffer */ - if (irb->region) { - if (map) - intel_region_map(intel, irb->region); - else - intel_region_unmap(intel, irb->region); - irb->pfMap = irb->region->map; - irb->pfPitch = irb->region->pitch; - } + if (irb && irb->region) { + intel_set_span_functions(intel, rb); + if (map) + intel_region_map(intel, irb->region); + else + intel_region_unmap(intel, irb->region); + irb->pfMap = irb->region->map; + irb->pfPitch = irb->region->pitch; } } @@ -526,6 +528,7 @@ intel_map_unmap_buffers(struct intel_context *intel, GLboolean map) /* color read buffers */ irb = intel_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer); if (irb && irb->region) { + intel_set_span_functions(intel, ctx->ReadBuffer->_ColorReadBuffer); if (map) intel_region_map(intel, irb->region); else @@ -568,6 +571,8 @@ intel_map_unmap_buffers(struct intel_context *intel, GLboolean map) irb = intel_renderbuffer(ctx->DrawBuffer->_DepthBuffer->Wrapped); if (irb && irb->region) { if (map) { + intel_set_span_functions(intel, + ctx->DrawBuffer->_DepthBuffer->Wrapped); intel_region_map(intel, irb->region); irb->pfMap = irb->region->map; irb->pfPitch = irb->region->pitch; @@ -585,6 +590,8 @@ intel_map_unmap_buffers(struct intel_context *intel, GLboolean map) irb = intel_renderbuffer(ctx->DrawBuffer->_StencilBuffer->Wrapped); if (irb && irb->region) { if (map) { + intel_set_span_functions(intel, + ctx->DrawBuffer->_StencilBuffer->Wrapped); intel_region_map(intel, irb->region); irb->pfMap = irb->region->map; irb->pfPitch = irb->region->pitch; @@ -615,15 +622,6 @@ intelSpanRenderStart(GLcontext * ctx) intelFlush(&intel->ctx); LOCK_HARDWARE(intel); -#if 0 - /* Just map the framebuffer and all textures. Bufmgr code will - * take care of waiting on the necessary fences: - */ - intel_region_map(intel, intel->front_region); - intel_region_map(intel, intel->back_region); - intel_region_map(intel, intel->depth_region); -#endif - for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) { if (ctx->Texture.Unit[i]._ReallyEnabled) { struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current; @@ -646,14 +644,6 @@ intelSpanRenderFinish(GLcontext * ctx) _swrast_flush(ctx); - /* Now unmap the framebuffer: - */ -#if 0 - intel_region_unmap(intel, intel->front_region); - intel_region_unmap(intel, intel->back_region); - intel_region_unmap(intel, intel->depth_region); -#endif - for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) { if (ctx->Texture.Unit[i]._ReallyEnabled) { struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current; @@ -680,20 +670,32 @@ intelInitSpanFuncs(GLcontext * ctx) * Plug in appropriate span read/write functions for the given renderbuffer. * These are used for the software fallbacks. */ -void -intel_set_span_functions(struct gl_renderbuffer *rb, enum tiling_mode tiling) +static void +intel_set_span_functions(struct intel_context *intel, + struct gl_renderbuffer *rb) { + struct intel_renderbuffer *irb = (struct intel_renderbuffer *) rb; + uint32_t tiling; + + /* If in GEM mode, we need to do the tile address swizzling ourselves, + * instead of the fence registers handling it. + */ + if (intel->ttm) + tiling = irb->region->tiling; + else + tiling = I915_TILING_NONE; + if (rb->_ActualFormat == GL_RGB5) { /* 565 RGB */ switch (tiling) { - case INTEL_TILE_NONE: + case I915_TILING_NONE: default: intelInitPointers_RGB565(rb); break; - case INTEL_TILE_X: + case I915_TILING_X: intel_XTile_InitPointers_RGB565(rb); break; - case INTEL_TILE_Y: + case I915_TILING_Y: intel_YTile_InitPointers_RGB565(rb); break; } @@ -701,28 +703,28 @@ intel_set_span_functions(struct gl_renderbuffer *rb, enum tiling_mode tiling) else if (rb->_ActualFormat == GL_RGBA8) { /* 8888 RGBA */ switch (tiling) { - case INTEL_TILE_NONE: + case I915_TILING_NONE: default: intelInitPointers_ARGB8888(rb); break; - case INTEL_TILE_X: + case I915_TILING_X: intel_XTile_InitPointers_ARGB8888(rb); break; - case INTEL_TILE_Y: + case I915_TILING_Y: intel_YTile_InitPointers_ARGB8888(rb); break; } } else if (rb->_ActualFormat == GL_DEPTH_COMPONENT16) { switch (tiling) { - case INTEL_TILE_NONE: + case I915_TILING_NONE: default: intelInitDepthPointers_z16(rb); break; - case INTEL_TILE_X: + case I915_TILING_X: intel_XTile_InitDepthPointers_z16(rb); break; - case INTEL_TILE_Y: + case I915_TILING_Y: intel_YTile_InitDepthPointers_z16(rb); break; } @@ -730,28 +732,28 @@ intel_set_span_functions(struct gl_renderbuffer *rb, enum tiling_mode tiling) else if (rb->_ActualFormat == GL_DEPTH_COMPONENT24 || /* XXX FBO remove */ rb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT) { switch (tiling) { - case INTEL_TILE_NONE: + case I915_TILING_NONE: default: intelInitDepthPointers_z24_s8(rb); break; - case INTEL_TILE_X: + case I915_TILING_X: intel_XTile_InitDepthPointers_z24_s8(rb); break; - case INTEL_TILE_Y: + case I915_TILING_Y: intel_YTile_InitDepthPointers_z24_s8(rb); break; } } else if (rb->_ActualFormat == GL_STENCIL_INDEX8_EXT) { switch (tiling) { - case INTEL_TILE_NONE: + case I915_TILING_NONE: default: intelInitStencilPointers_z24_s8(rb); break; - case INTEL_TILE_X: + case I915_TILING_X: intel_XTile_InitStencilPointers_z24_s8(rb); break; - case INTEL_TILE_Y: + case I915_TILING_Y: intel_YTile_InitStencilPointers_z24_s8(rb); break; } diff --git a/src/mesa/drivers/dri/intel/intel_span.h b/src/mesa/drivers/dri/intel/intel_span.h index 1b47c2829c..d2d4d6ecd4 100644 --- a/src/mesa/drivers/dri/intel/intel_span.h +++ b/src/mesa/drivers/dri/intel/intel_span.h @@ -33,7 +33,4 @@ extern void intelInitSpanFuncs(GLcontext * ctx); extern void intelSpanRenderFinish(GLcontext * ctx); extern void intelSpanRenderStart(GLcontext * ctx); -extern void intel_set_span_functions(struct gl_renderbuffer *rb, - enum tiling_mode tiling); - #endif diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c index 8a8eec83aa..cf8eb4ed3c 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_copy.c +++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c @@ -148,7 +148,7 @@ do_copy_texsubimage(struct intel_context *intel, intelImage->mt->pitch, intelImage->mt->region->buffer, image_offset, - intelImage->mt->region->tiled, + intelImage->mt->region->tiling, x, y + height, dstx, dsty, width, height, GL_COPY); /* ? */ } -- cgit v1.2.3 From 946abd9b5a55f999ef21f807769f5fb81b10a426 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 14 Jul 2008 13:55:37 -0700 Subject: i915: fix build after previous commit. --- src/mesa/drivers/dri/i915/intel_pixel_read.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i915/intel_pixel_read.c b/src/mesa/drivers/dri/i915/intel_pixel_read.c index 72e1f9ed28..d009590a4b 100644 --- a/src/mesa/drivers/dri/i915/intel_pixel_read.c +++ b/src/mesa/drivers/dri/i915/intel_pixel_read.c @@ -263,7 +263,7 @@ do_blit_readpixels(GLcontext * ctx, intelEmitCopyBlit(intel, src->cpp, - src->pitch, src->buffer, 0, src->tiled, + src->pitch, src->buffer, 0, src->tiling, rowLength, dst_buffer, dst_offset, GL_FALSE, rect.x1, rect.y1, -- cgit v1.2.3 From a5f02368d2a9ab1f814eba2c997729c6c655fc5e Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 15 Jul 2008 13:14:18 -0700 Subject: intel-gem: Disable spantmp sse/mmx functions when tile swizzling. Those functions rely on being able to treat the GET_PTR returned value as an array indexed by x, but that's not the case for our tiling. Bug #16387 --- src/mesa/drivers/dri/common/spantmp2.h | 16 +++++++++++++--- src/mesa/drivers/dri/intel/intel_span.c | 4 ++++ 2 files changed, 17 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/common/spantmp2.h b/src/mesa/drivers/dri/common/spantmp2.h index 53f5f846a0..5e51112a20 100644 --- a/src/mesa/drivers/dri/common/spantmp2.h +++ b/src/mesa/drivers/dri/common/spantmp2.h @@ -48,6 +48,10 @@ #define HW_WRITE_CLIPLOOP() HW_CLIPLOOP() #endif +/* Whether GET_PTR(x, y) + cpp != GET_PTR(x+1, y) */ +#ifndef GET_PTR_NONLINEAR +#define GET_PTR_NONLINEAR 0 +#endif #if (SPANTMP_PIXEL_FMT == GL_RGB) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5) @@ -389,7 +393,8 @@ static void TAG(ReadRGBASpan)( GLcontext *ctx, } -#if defined(USE_MMX_ASM) && \ +#if !GET_PTR_NONLINEAR && \ + defined(USE_MMX_ASM) && \ (((SPANTMP_PIXEL_FMT == GL_BGRA) && \ (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)) || \ ((SPANTMP_PIXEL_FMT == GL_RGB) && \ @@ -440,7 +445,8 @@ static void TAG2(ReadRGBASpan,_MMX)( GLcontext *ctx, #endif -#if defined(USE_SSE_ASM) && \ +#if !GET_PTR_NONLINEAR && \ + defined(USE_SSE_ASM) && \ (SPANTMP_PIXEL_FMT == GL_BGRA) && \ (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV) static void TAG2(ReadRGBASpan,_SSE2)( GLcontext *ctx, @@ -474,7 +480,8 @@ static void TAG2(ReadRGBASpan,_SSE2)( GLcontext *ctx, } #endif -#if defined(USE_SSE_ASM) && \ +#if !GET_PTR_NONLINEAR && \ + defined(USE_SSE_ASM) && \ (SPANTMP_PIXEL_FMT == GL_BGRA) && \ (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV) static void TAG2(ReadRGBASpan,_SSE)( GLcontext *ctx, @@ -567,6 +574,7 @@ static void TAG(InitPointers)(struct gl_renderbuffer *rb) rb->PutMonoValues = TAG(WriteMonoRGBAPixels); rb->GetValues = TAG(ReadRGBAPixels); +#if !GET_PTR_NONLINEAR #if defined(USE_SSE_ASM) && \ (SPANTMP_PIXEL_FMT == GL_BGRA) && \ (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV) @@ -596,6 +604,7 @@ static void TAG(InitPointers)(struct gl_renderbuffer *rb) } else #endif +#endif /* GET_PTR_NONLINEAR */ { if (DBG) fprintf( stderr, "Using %s version of GetRow\n", "C" ); rb->GetRow = TAG(ReadRGBASpan); @@ -611,5 +620,6 @@ static void TAG(InitPointers)(struct gl_renderbuffer *rb) #undef TAG #undef TAG2 #undef GET_PTR +#undef GET_PTR_NONLINEAR #undef SPANTMP_PIXEL_FMT #undef SPANTMP_PIXEL_TYPE diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index 3065d15e32..4f0855df0a 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -273,6 +273,7 @@ static GLubyte *y_tile_swizzle(struct intel_renderbuffer *irb, struct intel_cont #define TAG(x) intel_XTile_##x##_RGB565 #define TAG2(x,y) intel_XTile_##x##_RGB565##y #define GET_PTR(X,Y) x_tile_swizzle(irb, intel, X, Y) +#define GET_PTR_NONLINEAR 1 #include "spantmp2.h" #define SPANTMP_PIXEL_FMT GL_RGB @@ -281,6 +282,7 @@ static GLubyte *y_tile_swizzle(struct intel_renderbuffer *irb, struct intel_cont #define TAG(x) intel_YTile_##x##_RGB565 #define TAG2(x,y) intel_YTile_##x##_RGB565##y #define GET_PTR(X,Y) y_tile_swizzle(irb, intel, X, Y) +#define GET_PTR_NONLINEAR 1 #include "spantmp2.h" /* 32 bit ARGB888 color tile spanline and pixel functions @@ -292,6 +294,7 @@ static GLubyte *y_tile_swizzle(struct intel_renderbuffer *irb, struct intel_cont #define TAG(x) intel_XTile_##x##_ARGB8888 #define TAG2(x,y) intel_XTile_##x##_ARGB8888##y #define GET_PTR(X,Y) x_tile_swizzle(irb, intel, X, Y) +#define GET_PTR_NONLINEAR 1 #include "spantmp2.h" #define SPANTMP_PIXEL_FMT GL_BGRA @@ -300,6 +303,7 @@ static GLubyte *y_tile_swizzle(struct intel_renderbuffer *irb, struct intel_cont #define TAG(x) intel_YTile_##x##_ARGB8888 #define TAG2(x,y) intel_YTile_##x##_ARGB8888##y #define GET_PTR(X,Y) y_tile_swizzle(irb, intel, X, Y) +#define GET_PTR_NONLINEAR 1 #include "spantmp2.h" #define LOCAL_DEPTH_VARS \ -- cgit v1.2.3 From 442c195c4afce2509130a718c44a69a5b009979e Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 9 Jul 2008 08:57:02 -0700 Subject: Remove redundant initalization of MaxTextureUnits --- src/mesa/drivers/dri/i965/brw_context.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 33f1bba085..efe850b4d3 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -134,7 +134,6 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, ctx->Const.Max3DTextureLevels = 9; ctx->Const.MaxCubeTextureLevels = 12; ctx->Const.MaxTextureRectSize = (1<<11); - ctx->Const.MaxTextureUnits = BRW_MAX_TEX_UNIT; /* ctx->Const.MaxNativeVertexProgramTemps = 32; */ -- cgit v1.2.3 From 99fe0c222c2853a612b73aa6fcffb0a532ce5747 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Fri, 18 Jul 2008 12:40:04 -0700 Subject: intel-gem: Bump driver date Bump the driver date and insert the string "GEM". When running tests, this make it much easier to know that the right driver is being used. --- src/mesa/drivers/dri/intel/intel_context.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 7e3f370ad0..f573cf4d4f 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -96,11 +96,13 @@ int INTEL_DEBUG = (0); #include "extension_helper.h" -#define DRIVER_DATE "20061102" +#define DRIVER_DATE "20080716" +#define DRIVER_DATE_GEM "GEM " DRIVER_DATE static const GLubyte * intelGetString(GLcontext * ctx, GLenum name) { + const struct intel_context *const intel = intel_context(ctx); const char *chipset; static char buffer[128]; @@ -110,7 +112,7 @@ intelGetString(GLcontext * ctx, GLenum name) break; case GL_RENDERER: - switch (intel_context(ctx)->intelScreen->deviceID) { + switch (intel->intelScreen->deviceID) { case PCI_CHIP_845_G: chipset = "Intel(R) 845G"; break; @@ -181,7 +183,9 @@ intelGetString(GLcontext * ctx, GLenum name) break; } - (void) driGetRendererString(buffer, chipset, DRIVER_DATE, 0); + (void) driGetRendererString(buffer, chipset, + (intel->ttm) ? DRIVER_DATE_GEM : DRIVER_DATE, + 0); return (GLubyte *) buffer; default: -- cgit v1.2.3 From bdaa06ad639821368ac8d1af7b7561fd7e83fb13 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 15 Jul 2008 14:26:19 -0700 Subject: intel: move renderbuffer mapping to separate functions. This lets us avoid duplicated code for doing so, including the depthstencil paths that aren't covered by SpanRenderStart/Finish. Those paths were missing the span funcs setup, leading to a null dereference in the fbotexture demo. --- src/mesa/drivers/dri/intel/intel_depthstencil.c | 80 ++++++------------ src/mesa/drivers/dri/intel/intel_span.c | 103 ++++++++++++------------ src/mesa/drivers/dri/intel/intel_span.h | 4 + 3 files changed, 80 insertions(+), 107 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_depthstencil.c b/src/mesa/drivers/dri/intel/intel_depthstencil.c index 90baecd8c2..70ba68e9e3 100644 --- a/src/mesa/drivers/dri/intel/intel_depthstencil.c +++ b/src/mesa/drivers/dri/intel/intel_depthstencil.c @@ -39,7 +39,7 @@ #include "intel_fbo.h" #include "intel_depthstencil.h" #include "intel_regions.h" - +#include "intel_span.h" /** * The GL_EXT_framebuffer_object allows the user to create their own @@ -86,68 +86,33 @@ * */ - - -static void -map_regions(GLcontext * ctx, - struct intel_renderbuffer *depthRb, - struct intel_renderbuffer *stencilRb) -{ - struct intel_context *intel = intel_context(ctx); - if (depthRb && depthRb->region) { - intel_region_map(intel, depthRb->region); - depthRb->pfMap = depthRb->region->map; - depthRb->pfPitch = depthRb->region->pitch; - } - if (stencilRb && stencilRb->region) { - intel_region_map(intel, stencilRb->region); - stencilRb->pfMap = stencilRb->region->map; - stencilRb->pfPitch = stencilRb->region->pitch; - } -} - -static void -unmap_regions(GLcontext * ctx, - struct intel_renderbuffer *depthRb, - struct intel_renderbuffer *stencilRb) -{ - struct intel_context *intel = intel_context(ctx); - if (depthRb && depthRb->region) { - intel_region_unmap(intel, depthRb->region); - depthRb->pfMap = NULL; - depthRb->pfPitch = 0; - } - if (stencilRb && stencilRb->region) { - intel_region_unmap(intel, stencilRb->region); - stencilRb->pfMap = NULL; - stencilRb->pfPitch = 0; - } -} - - - /** * Undo the pairing/interleaving between depth and stencil buffers. * irb should be a depth/stencil or stencil renderbuffer. */ void -intel_unpair_depth_stencil(GLcontext * ctx, struct intel_renderbuffer *irb) +intel_unpair_depth_stencil(GLcontext *ctx, struct intel_renderbuffer *irb) { + struct intel_context *intel = intel_context(ctx); + struct gl_renderbuffer *rb = &irb->Base; + if (irb->PairedStencil) { /* irb is a depth/stencil buffer */ struct gl_renderbuffer *stencilRb; struct intel_renderbuffer *stencilIrb; - ASSERT(irb->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT); + ASSERT(rb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT); stencilRb = _mesa_lookup_renderbuffer(ctx, irb->PairedStencil); stencilIrb = intel_renderbuffer(stencilRb); if (stencilIrb) { /* need to extract stencil values from the depth buffer */ - ASSERT(stencilIrb->PairedDepth == irb->Base.Name); - map_regions(ctx, irb, stencilIrb); - _mesa_extract_stencil(ctx, &irb->Base, &stencilIrb->Base); - unmap_regions(ctx, irb, stencilIrb); + ASSERT(stencilIrb->PairedDepth == rb->Name); + intel_renderbuffer_map(intel, rb); + intel_renderbuffer_map(intel, stencilRb); + _mesa_extract_stencil(ctx, rb, stencilRb); + intel_renderbuffer_unmap(intel, stencilRb); + intel_renderbuffer_unmap(intel, rb); stencilIrb->PairedDepth = 0; } irb->PairedStencil = 0; @@ -157,17 +122,19 @@ intel_unpair_depth_stencil(GLcontext * ctx, struct intel_renderbuffer *irb) struct gl_renderbuffer *depthRb; struct intel_renderbuffer *depthIrb; - ASSERT(irb->Base._ActualFormat == GL_STENCIL_INDEX8_EXT || - irb->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT); + ASSERT(rb->_ActualFormat == GL_STENCIL_INDEX8_EXT || + rb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT); depthRb = _mesa_lookup_renderbuffer(ctx, irb->PairedDepth); depthIrb = intel_renderbuffer(depthRb); if (depthIrb) { /* need to extract stencil values from the depth buffer */ - ASSERT(depthIrb->PairedStencil == irb->Base.Name); - map_regions(ctx, depthIrb, irb); - _mesa_extract_stencil(ctx, &depthIrb->Base, &irb->Base); - unmap_regions(ctx, depthIrb, irb); + ASSERT(depthIrb->PairedStencil == rb->Name); + intel_renderbuffer_map(intel, rb); + intel_renderbuffer_map(intel, depthRb); + _mesa_extract_stencil(ctx, depthRb, rb); + intel_renderbuffer_unmap(intel, depthRb); + intel_renderbuffer_unmap(intel, rb); depthIrb->PairedStencil = 0; } irb->PairedDepth = 0; @@ -194,6 +161,7 @@ void intel_validate_paired_depth_stencil(GLcontext * ctx, struct gl_framebuffer *fb) { + struct intel_context *intel = intel_context(ctx); struct intel_renderbuffer *depthRb, *stencilRb; depthRb = intel_get_renderbuffer(fb, BUFFER_DEPTH); @@ -230,9 +198,11 @@ intel_validate_paired_depth_stencil(GLcontext * ctx, stencilRb->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT); /* establish new pairing: interleave stencil into depth buffer */ - map_regions(ctx, depthRb, stencilRb); + intel_renderbuffer_map(intel, &depthRb->Base); + intel_renderbuffer_map(intel, &stencilRb->Base); _mesa_insert_stencil(ctx, &depthRb->Base, &stencilRb->Base); - unmap_regions(ctx, depthRb, stencilRb); + intel_renderbuffer_unmap(intel, &stencilRb->Base); + intel_renderbuffer_unmap(intel, &depthRb->Base); depthRb->PairedStencil = stencilRb->Base.Name; stencilRb->PairedDepth = depthRb->Base.Name; } diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index 4f0855df0a..c9d413ec43 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -474,7 +474,39 @@ static GLubyte *y_tile_swizzle(struct intel_renderbuffer *irb, struct intel_cont #define TAG(x) intel_YTile_##x##_z24_s8 #include "stenciltmp.h" +void +intel_renderbuffer_map(struct intel_context *intel, struct gl_renderbuffer *rb) +{ + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + + if (irb == NULL || irb->region == NULL) + return; + + intel_region_map(intel, irb->region); + + irb->pfMap = irb->region->map; + irb->pfPitch = irb->region->pitch; + + intel_set_span_functions(intel, rb); +} + +void +intel_renderbuffer_unmap(struct intel_context *intel, + struct gl_renderbuffer *rb) +{ + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + + if (irb == NULL || irb->region == NULL) + return; + intel_region_unmap(intel, irb->region); + + irb->pfMap = NULL; + irb->pfPitch = 0; + + rb->GetRow = NULL; + rb->PutRow = NULL; +} /** * Map or unmap all the renderbuffers which we may need during @@ -493,21 +525,13 @@ intel_map_unmap_buffers(struct intel_context *intel, GLboolean map) { GLcontext *ctx = &intel->ctx; GLuint i, j; - struct intel_renderbuffer *irb; /* color draw buffers */ for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++) { - struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[j]; - irb = intel_renderbuffer(rb); - if (irb && irb->region) { - intel_set_span_functions(intel, rb); - if (map) - intel_region_map(intel, irb->region); - else - intel_region_unmap(intel, irb->region); - irb->pfMap = irb->region->map; - irb->pfPitch = irb->region->pitch; - } + if (map) + intel_renderbuffer_map(intel, ctx->DrawBuffer->_ColorDrawBuffers[j]); + else + intel_renderbuffer_unmap(intel, ctx->DrawBuffer->_ColorDrawBuffers[j]); } /* check for render to textures */ @@ -530,16 +554,10 @@ intel_map_unmap_buffers(struct intel_context *intel, GLboolean map) } /* color read buffers */ - irb = intel_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer); - if (irb && irb->region) { - intel_set_span_functions(intel, ctx->ReadBuffer->_ColorReadBuffer); - if (map) - intel_region_map(intel, irb->region); - else - intel_region_unmap(intel, irb->region); - irb->pfMap = irb->region->map; - irb->pfPitch = irb->region->pitch; - } + if (map) + intel_renderbuffer_map(intel, ctx->ReadBuffer->_ColorReadBuffer); + else + intel_renderbuffer_unmap(intel, ctx->ReadBuffer->_ColorReadBuffer); /* Account for front/back color page flipping. * The span routines use the pfMap and pfPitch fields which will @@ -572,40 +590,21 @@ intel_map_unmap_buffers(struct intel_context *intel, GLboolean map) /* depth buffer (Note wrapper!) */ if (ctx->DrawBuffer->_DepthBuffer) { - irb = intel_renderbuffer(ctx->DrawBuffer->_DepthBuffer->Wrapped); - if (irb && irb->region) { - if (map) { - intel_set_span_functions(intel, - ctx->DrawBuffer->_DepthBuffer->Wrapped); - intel_region_map(intel, irb->region); - irb->pfMap = irb->region->map; - irb->pfPitch = irb->region->pitch; - } - else { - intel_region_unmap(intel, irb->region); - irb->pfMap = irb->region->map; - irb->pfPitch = irb->region->pitch; - } - } + if (map) + intel_renderbuffer_map(intel, ctx->DrawBuffer->_DepthBuffer->Wrapped); + else + intel_renderbuffer_unmap(intel, + ctx->DrawBuffer->_DepthBuffer->Wrapped); } /* stencil buffer (Note wrapper!) */ if (ctx->DrawBuffer->_StencilBuffer) { - irb = intel_renderbuffer(ctx->DrawBuffer->_StencilBuffer->Wrapped); - if (irb && irb->region) { - if (map) { - intel_set_span_functions(intel, - ctx->DrawBuffer->_StencilBuffer->Wrapped); - intel_region_map(intel, irb->region); - irb->pfMap = irb->region->map; - irb->pfPitch = irb->region->pitch; - } - else { - intel_region_unmap(intel, irb->region); - irb->pfMap = irb->region->map; - irb->pfPitch = irb->region->pitch; - } - } + if (map) + intel_renderbuffer_map(intel, + ctx->DrawBuffer->_StencilBuffer->Wrapped); + else + intel_renderbuffer_unmap(intel, + ctx->DrawBuffer->_StencilBuffer->Wrapped); } } diff --git a/src/mesa/drivers/dri/intel/intel_span.h b/src/mesa/drivers/dri/intel/intel_span.h index d2d4d6ecd4..acbeb4abe1 100644 --- a/src/mesa/drivers/dri/intel/intel_span.h +++ b/src/mesa/drivers/dri/intel/intel_span.h @@ -32,5 +32,9 @@ extern void intelInitSpanFuncs(GLcontext * ctx); extern void intelSpanRenderFinish(GLcontext * ctx); extern void intelSpanRenderStart(GLcontext * ctx); +void intel_renderbuffer_map(struct intel_context *intel, + struct gl_renderbuffer *rb); +void intel_renderbuffer_unmap(struct intel_context *intel, + struct gl_renderbuffer *rb); #endif -- cgit v1.2.3 From 1c8791c581ba2e3906a98a74e998dd51dd474ddb Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 16 Jul 2008 10:34:39 -0700 Subject: intel: Fix CopyTexSubImage's src tiling arg for the blit. Didn't hurt 915, but needed for 965. --- src/mesa/drivers/dri/intel/intel_tex_copy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c index cf8eb4ed3c..d0ab464a1c 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_copy.c +++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c @@ -144,7 +144,7 @@ do_copy_texsubimage(struct intel_context *intel, -src->pitch, src->buffer, src->height * src->pitch * src->cpp, - GL_FALSE, + src->tiling, intelImage->mt->pitch, intelImage->mt->region->buffer, image_offset, -- cgit v1.2.3 From f0ca917924a749b1fa287cc9536607ace03c2f89 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 16 Jul 2008 11:13:35 -0700 Subject: intel: improve 2d batchbuffer debug output. --- src/mesa/drivers/dri/intel/intel_decode.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_decode.c b/src/mesa/drivers/dri/intel/intel_decode.c index a1240639f4..9c105013c0 100644 --- a/src/mesa/drivers/dri/intel/intel_decode.c +++ b/src/mesa/drivers/dri/intel/intel_decode.c @@ -183,9 +183,10 @@ decode_2d(uint32_t *data, int count, uint32_t hw_offset, int *failures) switch ((data[0] & 0x1fc00000) >> 22) { case 0x50: instr_out(data, hw_offset, 0, - "XY_COLOR_BLT (rgb %sabled, alpha %sabled)\n", + "XY_COLOR_BLT (rgb %sabled, alpha %sabled, dst tile %d)\n", (data[0] & (1 << 20)) ? "en" : "dis", - (data[0] & (1 << 21)) ? "en" : "dis"); + (data[0] & (1 << 21)) ? "en" : "dis", + (data[0] >> 11) & 1); len = (data[0] & 0x000000ff) + 2; if (len != 6) @@ -210,7 +211,8 @@ decode_2d(uint32_t *data, int count, uint32_t hw_offset, int *failures) instr_out(data, hw_offset, 1, "format %s, pitch %d, " "clipping %sabled\n", format, - data[1] & 0xffff, data[1] & (1 << 30) ? "en" : "dis"); + (short)(data[1] & 0xffff), + data[1] & (1 << 30) ? "en" : "dis"); instr_out(data, hw_offset, 2, "(%d,%d)\n", data[2] & 0xffff, data[2] >> 16); instr_out(data, hw_offset, 3, "(%d,%d)\n", @@ -220,9 +222,12 @@ decode_2d(uint32_t *data, int count, uint32_t hw_offset, int *failures) return len; case 0x53: instr_out(data, hw_offset, 0, - "XY_SRC_COPY_BLT (rgb %sabled, alpha %sabled)\n", + "XY_SRC_COPY_BLT (rgb %sabled, alpha %sabled, " + "src tile %d, dst tile %d)\n", (data[0] & (1 << 20)) ? "en" : "dis", - (data[0] & (1 << 21)) ? "en" : "dis"); + (data[0] & (1 << 21)) ? "en" : "dis", + (data[0] >> 15) & 1, + (data[0] >> 11) & 1); len = (data[0] & 0x000000ff) + 2; if (len != 8) @@ -247,16 +252,17 @@ decode_2d(uint32_t *data, int count, uint32_t hw_offset, int *failures) instr_out(data, hw_offset, 1, "format %s, dst pitch %d, " "clipping %sabled\n", format, - data[1] & 0xffff, data[1] & (1 << 30) ? "en" : "dis"); + (short)(data[1] & 0xffff), + data[1] & (1 << 30) ? "en" : "dis"); instr_out(data, hw_offset, 2, "dst (%d,%d)\n", data[2] & 0xffff, data[2] >> 16); instr_out(data, hw_offset, 3, "dst (%d,%d)\n", - data[2] & 0xffff, data[2] >> 16); + data[3] & 0xffff, data[3] >> 16); instr_out(data, hw_offset, 4, "dst offset 0x%08x\n", data[4]); instr_out(data, hw_offset, 5, "src (%d,%d)\n", data[5] & 0xffff, data[5] >> 16); instr_out(data, hw_offset, 6, "src pitch %d\n", - data[6] & 0xffff); + (short)(data[6] & 0xffff)); instr_out(data, hw_offset, 7, "src offset 0x%08x\n", data[7]); return len; } -- cgit v1.2.3 From d2d5abfaeb46fc7b4d4267a6c9e92420fc9b5334 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 22 Jul 2008 15:20:07 -0700 Subject: intel-gem: Use pread/pwrite for span access. This will avoid clflushing entire buffers for small acesses, such as those commonly used by regression tests. --- src/mesa/drivers/dri/common/spantmp2.h | 64 ++++---- src/mesa/drivers/dri/intel/intel_fbo.c | 1 - src/mesa/drivers/dri/intel/intel_fbo.h | 1 - src/mesa/drivers/dri/intel/intel_span.c | 272 +++++++++++++------------------- 4 files changed, 141 insertions(+), 197 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/common/spantmp2.h b/src/mesa/drivers/dri/common/spantmp2.h index 5e51112a20..a1e56eb148 100644 --- a/src/mesa/drivers/dri/common/spantmp2.h +++ b/src/mesa/drivers/dri/common/spantmp2.h @@ -48,40 +48,34 @@ #define HW_WRITE_CLIPLOOP() HW_CLIPLOOP() #endif -/* Whether GET_PTR(x, y) + cpp != GET_PTR(x+1, y) */ -#ifndef GET_PTR_NONLINEAR -#define GET_PTR_NONLINEAR 0 -#endif - #if (SPANTMP_PIXEL_FMT == GL_RGB) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5) /** ** GL_RGB, GL_UNSIGNED_SHORT_5_6_5 **/ +#ifndef GET_VALUE #ifndef GET_PTR #define GET_PTR(_x, _y) (buf + (_x) * 2 + (_y) * pitch) #endif +#define GET_VALUE(_x, _y) *(volatile GLushort *)(GET_PTR(_x, _y)) +#define PUT_VALUE(_x, _y, _v) *(volatile GLushort *)(GET_PTR(_x, _y)) = (_v) +#endif /* GET_VALUE */ + #define INIT_MONO_PIXEL(p, color) \ p = PACK_COLOR_565( color[0], color[1], color[2] ) #define WRITE_RGBA( _x, _y, r, g, b, a ) \ - do { \ - GLshort * _p = (GLshort *) GET_PTR(_x, _y); \ - _p[0] = ((((int)r & 0xf8) << 8) | (((int)g & 0xfc) << 3) | \ - (((int)b & 0xf8) >> 3)); \ - } while(0) + PUT_VALUE(_x, _y, ((((int)r & 0xf8) << 8) | \ + (((int)g & 0xfc) << 3) | \ + (((int)b & 0xf8) >> 3))) \ -#define WRITE_PIXEL( _x, _y, p ) \ - do { \ - GLushort * _p = (GLushort *) GET_PTR(_x, _y); \ - _p[0] = p; \ - } while(0) +#define WRITE_PIXEL( _x, _y, p ) PUT_VALUE(_x, _y, p) #define READ_RGBA( rgba, _x, _y ) \ do { \ - GLushort p = *(volatile GLshort *) GET_PTR(_x, _y); \ + GLushort p = GET_VALUE(_x, _y); \ rgba[0] = ((p >> 8) & 0xf8) * 255 / 0xf8; \ rgba[1] = ((p >> 3) & 0xfc) * 255 / 0xfc; \ rgba[2] = ((p << 3) & 0xf8) * 255 / 0xf8; \ @@ -94,29 +88,30 @@ ** GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV **/ +#ifndef GET_VALUE #ifndef GET_PTR #define GET_PTR(_x, _y) ( buf + (_x) * 4 + (_y) * pitch) #endif +#define GET_VALUE(_x, _y) *(volatile GLuint *)(GET_PTR(_x, _y)) +#define PUT_VALUE(_x, _y, _v) *(volatile GLuint *)(GET_PTR(_x, _y)) = (_v) +#endif /* GET_VALUE */ + # define INIT_MONO_PIXEL(p, color) \ p = PACK_COLOR_8888(color[3], color[0], color[1], color[2]) # define WRITE_RGBA(_x, _y, r, g, b, a) \ - do { \ - GLuint * _p = (GLuint *) GET_PTR(_x, _y); \ - _p[0] = ((r << 16) | (g << 8) | (b << 0) | (a << 24)); \ - } while(0) + PUT_VALUE(_x, _y, ((r << 16) | \ + (g << 8) | \ + (b << 0) | \ + (a << 24))) -#define WRITE_PIXEL(_x, _y, p) \ - do { \ - GLuint * _p = (GLuint *) GET_PTR(_x, _y); \ - _p[0] = p; \ - } while(0) +#define WRITE_PIXEL(_x, _y, p) PUT_VALUE(_x, _y, p) # if defined( USE_X86_ASM ) # define READ_RGBA(rgba, _x, _y) \ do { \ - GLuint p = *(volatile GLuint *) GET_PTR(_x, _y); \ + GLuint p = GET_VALUE(_x, _y); \ __asm__ __volatile__( "bswap %0; rorl $8, %0" \ : "=r" (p) : "0" (p) ); \ ((GLuint *)rgba)[0] = p; \ @@ -127,14 +122,14 @@ */ # define READ_RGBA( rgba, _x, _y ) \ do { \ - GLuint p = *(volatile GLuint *) GET_PTR(_x, _y); \ + GLuint p = GET_VALUE(_x, _y); \ GLuint t = p; \ *((uint32_t *) rgba) = (t >> 24) | (p << 8); \ } while (0) # else # define READ_RGBA( rgba, _x, _y ) \ do { \ - GLuint p = *(volatile GLuint *) GET_PTR(_x, _y); \ + GLuint p = GET_VALUE(_x, _y); \ rgba[0] = (p >> 16) & 0xff; \ rgba[1] = (p >> 8) & 0xff; \ rgba[2] = (p >> 0) & 0xff; \ @@ -393,7 +388,7 @@ static void TAG(ReadRGBASpan)( GLcontext *ctx, } -#if !GET_PTR_NONLINEAR && \ +#if defined(GET_PTR) && \ defined(USE_MMX_ASM) && \ (((SPANTMP_PIXEL_FMT == GL_BGRA) && \ (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)) || \ @@ -445,7 +440,7 @@ static void TAG2(ReadRGBASpan,_MMX)( GLcontext *ctx, #endif -#if !GET_PTR_NONLINEAR && \ +#if defined(GET_PTR) && \ defined(USE_SSE_ASM) && \ (SPANTMP_PIXEL_FMT == GL_BGRA) && \ (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV) @@ -480,7 +475,7 @@ static void TAG2(ReadRGBASpan,_SSE2)( GLcontext *ctx, } #endif -#if !GET_PTR_NONLINEAR && \ +#if defined(GET_PTR) && \ defined(USE_SSE_ASM) && \ (SPANTMP_PIXEL_FMT == GL_BGRA) && \ (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV) @@ -574,7 +569,7 @@ static void TAG(InitPointers)(struct gl_renderbuffer *rb) rb->PutMonoValues = TAG(WriteMonoRGBAPixels); rb->GetValues = TAG(ReadRGBAPixels); -#if !GET_PTR_NONLINEAR +#if defined(GET_PTR) #if defined(USE_SSE_ASM) && \ (SPANTMP_PIXEL_FMT == GL_BGRA) && \ (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV) @@ -604,7 +599,7 @@ static void TAG(InitPointers)(struct gl_renderbuffer *rb) } else #endif -#endif /* GET_PTR_NONLINEAR */ +#endif /* GET_PTR */ { if (DBG) fprintf( stderr, "Using %s version of GetRow\n", "C" ); rb->GetRow = TAG(ReadRGBASpan); @@ -619,7 +614,8 @@ static void TAG(InitPointers)(struct gl_renderbuffer *rb) #undef READ_RGBA #undef TAG #undef TAG2 +#undef GET_VALUE +#undef PUT_VALUE #undef GET_PTR -#undef GET_PTR_NONLINEAR #undef SPANTMP_PIXEL_FMT #undef SPANTMP_PIXEL_TYPE diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index 7663393fba..d539097a66 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -363,7 +363,6 @@ intel_renderbuffer_set_region(struct intel_renderbuffer *rb, intel_region_reference(&rb->region, region); intel_region_release(&old); - rb->pfMap = region->map; rb->pfPitch = region->pitch; } diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h index 5fe0fd8abf..f55d3747f2 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.h +++ b/src/mesa/drivers/dri/intel/intel_fbo.h @@ -70,7 +70,6 @@ struct intel_renderbuffer { struct gl_renderbuffer Base; struct intel_region *region; - void *pfMap; /* possibly paged flipped map pointer */ GLuint pfPitch; /* possibly paged flipped pitch */ GLboolean RenderToTexture; /* RTT? */ diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index c9d413ec43..44e2eff680 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -43,57 +43,72 @@ static void intel_set_span_functions(struct intel_context *intel, struct gl_renderbuffer *rb); -/* - * Deal with tiled surfaces - */ +static uint32_t +pread_32(struct intel_renderbuffer *irb, uint32_t offset) +{ + uint32_t val; -#if 0 -/* These are pre-965 tile swizzling functions -- power of two widths */ -static uintptr_t x_tile_swizzle_pow2 (uintptr_t addr, int n) + dri_bo_get_subdata(irb->region->buffer, offset, 4, &val); + + return val; +} + +static uint16_t +pread_16(struct intel_renderbuffer *irb, uint32_t offset) { - uintptr_t a = addr; - uintptr_t base_mask = (((~0) << (n + 4)) | 0xff); - uintptr_t x_mask = ((~0) << 12) & ~base_mask; - - a = ((a & base_mask) | - ((a >> (n-8)) & 0x7) | - ((a << 3) & x_mask)); - _mesa_printf ("x_swizzle %08x (base %x yrow %x tile#x %x xsword %x byte %x) %08x\n", - addr, - addr >> (n + 4), - (addr >> (n + 1)) & 0x7, - (addr >> 9) & ((1 << (n-8)) - 1), - (addr >> 5) & 0xf, - (addr & 0x1f), - a); - return a; + uint16_t val; + + dri_bo_get_subdata(irb->region->buffer, offset, 2, &val); + + return val; } -static uintptr_t y_tile_swizzle_pow2 (uintptr_t addr, int n) +static uint8_t +pread_8(struct intel_renderbuffer *irb, uint32_t offset) { - uintptr_t a = (uintptr_t) addr; - uintptr_t base_mask = (((~0) << (n + 6)) | 0xf); - uintptr_t x_mask = ((~0) << 9) & ~base_mask; - - a = ((a & base_mask) | - ((a >> (n-3)) & 0x1f) | - ((a << 5) & x_mask)); - _mesa_printf ("y_swizzle %08x (base %x yrow %x tile#x %x xoword %x byte %x) %08x\n", - addr, - addr >> (n + 6), - (addr >> (n + 1)) & 0x01f, - (addr >> 7) & ((1 << (n-6)) - 1), - (addr >> 4) & 0x7, - (addr & 0xf), - a); - return a; + uint8_t val; + + dri_bo_get_subdata(irb->region->buffer, offset, 1, &val); + + return val; +} + +static void +pwrite_32(struct intel_renderbuffer *irb, uint32_t offset, uint32_t val) +{ + dri_bo_subdata(irb->region->buffer, offset, 4, &val); +} + +static void +pwrite_16(struct intel_renderbuffer *irb, uint32_t offset, uint16_t val) +{ + dri_bo_subdata(irb->region->buffer, offset, 2, &val); +} + +static void +pwrite_8(struct intel_renderbuffer *irb, uint32_t offset, uint8_t val) +{ + dri_bo_subdata(irb->region->buffer, offset, 1, &val); +} + +static uint32_t no_tile_swizzle(struct intel_renderbuffer *irb, + struct intel_context *intel, + int x, int y) +{ + x += intel->drawX; + y += intel->drawY; + + return (y * irb->region->pitch + x) * irb->region->cpp; } -#endif -static GLubyte *x_tile_swizzle(struct intel_renderbuffer *irb, struct intel_context *intel, +/* + * Deal with tiled surfaces + */ + +static uint32_t x_tile_swizzle(struct intel_renderbuffer *irb, + struct intel_context *intel, int x, int y) { - GLubyte *buf = (GLubyte *) irb->pfMap; int tile_stride; int xbyte; int x_tile_off, y_tile_off; @@ -146,13 +161,13 @@ static GLubyte *x_tile_swizzle(struct intel_renderbuffer *irb, struct intel_cont irb->pfPitch, tile_stride); #endif - return buf + tile_base + tile_off; + return tile_base + tile_off; } -static GLubyte *y_tile_swizzle(struct intel_renderbuffer *irb, struct intel_context *intel, +static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, + struct intel_context *intel, int x, int y) { - GLubyte *buf = (GLubyte *) irb->pfMap; int tile_stride; int xbyte; int x_tile_off, y_tile_off; @@ -199,7 +214,7 @@ static GLubyte *y_tile_swizzle(struct intel_renderbuffer *irb, struct intel_cont tile_base = (x_tile_number << 12) + y_tile_number * tile_stride; - return buf + tile_base + tile_off; + return tile_base + tile_off; } /* @@ -214,11 +229,8 @@ static GLubyte *y_tile_swizzle(struct intel_renderbuffer *irb, struct intel_cont struct intel_renderbuffer *irb = intel_renderbuffer(rb); \ const GLint yScale = irb->RenderToTexture ? 1 : -1; \ const GLint yBias = irb->RenderToTexture ? 0 : irb->Base.Height - 1; \ - GLubyte *buf = (GLubyte *) irb->pfMap \ - + (intel->drawY * irb->pfPitch + intel->drawX) * irb->region->cpp;\ GLuint p; \ - assert(irb->pfMap);\ - (void) p; (void) buf; + (void) p; /* XXX FBO: this is identical to the macro in spantmp2.h except we get * the cliprect info from the context, not the driDrawable. @@ -251,7 +263,8 @@ static GLubyte *y_tile_swizzle(struct intel_renderbuffer *irb, struct intel_cont #define TAG(x) intel##x##_RGB565 #define TAG2(x,y) intel##x##_RGB565##y -#define GET_PTR(X,Y) (buf + ((Y) * irb->pfPitch + (X)) * 2) +#define GET_VALUE(X, Y) pread_16(irb, no_tile_swizzle(irb, intel, X, Y)) +#define PUT_VALUE(X, Y, V) pwrite_16(irb, no_tile_swizzle(irb, intel, X, Y), V) #include "spantmp2.h" /* 32 bit, ARGB8888 color spanline and pixel functions @@ -261,7 +274,8 @@ static GLubyte *y_tile_swizzle(struct intel_renderbuffer *irb, struct intel_cont #define TAG(x) intel##x##_ARGB8888 #define TAG2(x,y) intel##x##_ARGB8888##y -#define GET_PTR(X,Y) (buf + ((Y) * irb->pfPitch + (X)) * 4) +#define GET_VALUE(X, Y) pread_32(irb, no_tile_swizzle(irb, intel, X, Y)) +#define PUT_VALUE(X, Y, V) pwrite_32(irb, no_tile_swizzle(irb, intel, X, Y), V) #include "spantmp2.h" /* 16 bit RGB565 color tile spanline and pixel functions @@ -272,8 +286,8 @@ static GLubyte *y_tile_swizzle(struct intel_renderbuffer *irb, struct intel_cont #define TAG(x) intel_XTile_##x##_RGB565 #define TAG2(x,y) intel_XTile_##x##_RGB565##y -#define GET_PTR(X,Y) x_tile_swizzle(irb, intel, X, Y) -#define GET_PTR_NONLINEAR 1 +#define GET_VALUE(X, Y) pread_16(irb, x_tile_swizzle(irb, intel, X, Y)) +#define PUT_VALUE(X, Y, V) pwrite_16(irb, x_tile_swizzle(irb, intel, X, Y), V) #include "spantmp2.h" #define SPANTMP_PIXEL_FMT GL_RGB @@ -281,8 +295,8 @@ static GLubyte *y_tile_swizzle(struct intel_renderbuffer *irb, struct intel_cont #define TAG(x) intel_YTile_##x##_RGB565 #define TAG2(x,y) intel_YTile_##x##_RGB565##y -#define GET_PTR(X,Y) y_tile_swizzle(irb, intel, X, Y) -#define GET_PTR_NONLINEAR 1 +#define GET_VALUE(X, Y) pread_16(irb, y_tile_swizzle(irb, intel, X, Y)) +#define PUT_VALUE(X, Y, V) pwrite_16(irb, y_tile_swizzle(irb, intel, X, Y), V) #include "spantmp2.h" /* 32 bit ARGB888 color tile spanline and pixel functions @@ -293,8 +307,8 @@ static GLubyte *y_tile_swizzle(struct intel_renderbuffer *irb, struct intel_cont #define TAG(x) intel_XTile_##x##_ARGB8888 #define TAG2(x,y) intel_XTile_##x##_ARGB8888##y -#define GET_PTR(X,Y) x_tile_swizzle(irb, intel, X, Y) -#define GET_PTR_NONLINEAR 1 +#define GET_VALUE(X, Y) pread_32(irb, x_tile_swizzle(irb, intel, X, Y)) +#define PUT_VALUE(X, Y, V) pwrite_32(irb, x_tile_swizzle(irb, intel, X, Y), V) #include "spantmp2.h" #define SPANTMP_PIXEL_FMT GL_BGRA @@ -302,18 +316,15 @@ static GLubyte *y_tile_swizzle(struct intel_renderbuffer *irb, struct intel_cont #define TAG(x) intel_YTile_##x##_ARGB8888 #define TAG2(x,y) intel_YTile_##x##_ARGB8888##y -#define GET_PTR(X,Y) y_tile_swizzle(irb, intel, X, Y) -#define GET_PTR_NONLINEAR 1 +#define GET_VALUE(X, Y) pread_32(irb, y_tile_swizzle(irb, intel, X, Y)) +#define PUT_VALUE(X, Y, V) pwrite_32(irb, y_tile_swizzle(irb, intel, X, Y), V) #include "spantmp2.h" #define LOCAL_DEPTH_VARS \ struct intel_context *intel = intel_context(ctx); \ struct intel_renderbuffer *irb = intel_renderbuffer(rb); \ - const GLuint pitch = irb->pfPitch/***XXX region->pitch*/; /* in pixels */ \ const GLint yScale = irb->RenderToTexture ? 1 : -1; \ - const GLint yBias = irb->RenderToTexture ? 0 : irb->Base.Height - 1; \ - char *buf = (char *) irb->pfMap/*XXX use region->map*/ + \ - (intel->drawY * pitch + intel->drawX) * irb->region->cpp; (void) buf; + const GLint yBias = irb->RenderToTexture ? 0 : irb->Base.Height - 1; #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS @@ -321,13 +332,10 @@ static GLubyte *y_tile_swizzle(struct intel_renderbuffer *irb, struct intel_cont /** ** 16-bit depthbuffer functions. **/ -#define WRITE_DEPTH( _x, _y, d ) \ - ((GLushort *)buf)[(_x) + (_y) * pitch] = d; - -#define READ_DEPTH( d, _x, _y ) \ - d = ((GLushort *)buf)[(_x) + (_y) * pitch]; - - +#define WRITE_DEPTH(_x, _y, d) \ + pwrite_16(irb, no_tile_swizzle(irb, intel, _x, _y), d) +#define READ_DEPTH(d, _x, _y) \ + d = pread_16(irb, no_tile_swizzle(irb, intel, _x, _y)) #define TAG(x) intel##x##_z16 #include "depthtmp.h" @@ -335,26 +343,20 @@ static GLubyte *y_tile_swizzle(struct intel_renderbuffer *irb, struct intel_cont /** ** 16-bit x tile depthbuffer functions. **/ -#define WRITE_DEPTH( _x, _y, d ) \ - (*((GLushort *)x_tile_swizzle (irb, intel, _x, _y)) = d) - -#define READ_DEPTH( d, _x, _y ) \ - d = *((GLushort *)x_tile_swizzle (irb, intel, _x, _y)) - - +#define WRITE_DEPTH(_x, _y, d) \ + pwrite_16(irb, x_tile_swizzle(irb, intel, _x, _y), d) +#define READ_DEPTH(d, _x, _y) \ + d = pread_16(irb, x_tile_swizzle(irb, intel, _x, _y)) #define TAG(x) intel_XTile_##x##_z16 #include "depthtmp.h" /** ** 16-bit y tile depthbuffer functions. **/ -#define WRITE_DEPTH( _x, _y, d ) \ - (*((GLushort *)y_tile_swizzle (irb, intel, _x, _y)) = d) - -#define READ_DEPTH( d, _x, _y ) \ - (d = *((GLushort *)y_tile_swizzle (irb, intel, _x, _y))) - - +#define WRITE_DEPTH(_x, _y, d) \ + pwrite_16(irb, y_tile_swizzle(irb, intel, _x, _y), d) +#define READ_DEPTH(d, _x, _y) \ + d = pread_16(irb, y_tile_swizzle(irb, intel, _x, _y)) #define TAG(x) intel_YTile_##x##_z16 #include "depthtmp.h" @@ -366,14 +368,13 @@ static GLubyte *y_tile_swizzle(struct intel_renderbuffer *irb, struct intel_cont ** and stencil values. **/ /* Change ZZZS -> SZZZ */ -#define WRITE_DEPTH( _x, _y, d ) { \ - GLuint tmp = ((d) >> 8) | ((d) << 24); \ - ((GLuint *)buf)[(_x) + (_y) * pitch] = tmp; \ -} +#define WRITE_DEPTH(_x, _y, d) \ + pwrite_32(irb, no_tile_swizzle(irb, intel, _x, _y), \ + ((d) >> 8) | ((d) << 24)) /* Change SZZZ -> ZZZS */ #define READ_DEPTH( d, _x, _y ) { \ - GLuint tmp = ((GLuint *)buf)[(_x) + (_y) * pitch]; \ + GLuint tmp = pread_32(irb, no_tile_swizzle(irb, intel, _x, _y)); \ d = (tmp << 8) | (tmp >> 24); \ } @@ -388,14 +389,13 @@ static GLubyte *y_tile_swizzle(struct intel_renderbuffer *irb, struct intel_cont ** and stencil values. **/ /* Change ZZZS -> SZZZ */ -#define WRITE_DEPTH( _x, _y, d ) { \ - GLuint tmp = ((d) >> 8) | ((d) << 24); \ - *((GLuint *)x_tile_swizzle (irb, intel, _x, _y)) = tmp; \ -} +#define WRITE_DEPTH(_x, _y, d) \ + pwrite_32(irb, x_tile_swizzle(irb, intel, _x, _y), \ + ((d) >> 8) | ((d) << 24)) \ /* Change SZZZ -> ZZZS */ #define READ_DEPTH( d, _x, _y ) { \ - GLuint tmp = *((GLuint *)x_tile_swizzle (irb, intel, _x, _y)); \ + GLuint tmp = pread_32(irb, x_tile_swizzle(irb, intel, _x, _y)); \ d = (tmp << 8) | (tmp >> 24); \ } @@ -409,14 +409,13 @@ static GLubyte *y_tile_swizzle(struct intel_renderbuffer *irb, struct intel_cont ** and stencil values. **/ /* Change ZZZS -> SZZZ */ -#define WRITE_DEPTH( _x, _y, d ) { \ - GLuint tmp = ((d) >> 8) | ((d) << 24); \ - *((GLuint *)y_tile_swizzle (irb, intel, _x, _y)) = tmp; \ -} +#define WRITE_DEPTH(_x, _y, d) \ + pwrite_32(irb, y_tile_swizzle(irb, intel, _x, _y), \ + ((d) >> 8) | ((d) << 24)) /* Change SZZZ -> ZZZS */ #define READ_DEPTH( d, _x, _y ) { \ - GLuint tmp = *((GLuint *)y_tile_swizzle (irb, intel, _x, _y)); \ + GLuint tmp = pread_32(irb, y_tile_swizzle(irb, intel, _x, _y)); \ d = (tmp << 8) | (tmp >> 24); \ } @@ -427,15 +426,11 @@ static GLubyte *y_tile_swizzle(struct intel_renderbuffer *irb, struct intel_cont /** ** 8-bit stencil function (XXX FBO: This is obsolete) **/ -#define WRITE_STENCIL( _x, _y, d ) { \ - GLuint tmp = ((GLuint *)buf)[(_x) + (_y) * pitch]; \ - tmp &= 0xffffff; \ - tmp |= ((d) << 24); \ - ((GLuint *) buf)[(_x) + (_y) * pitch] = tmp; \ -} +#define WRITE_STENCIL(_x, _y, d) \ + pwrite_8(irb, no_tile_swizzle(irb, intel, _x, _y) + 3, d) -#define READ_STENCIL( d, _x, _y ) \ - d = ((GLuint *)buf)[(_x) + (_y) * pitch] >> 24; +#define READ_STENCIL(d, _x, _y) \ + d = pread_8(irb, no_tile_swizzle(irb, intel, _x, _y) + 3); #define TAG(x) intel##x##_z24_s8 #include "stenciltmp.h" @@ -443,16 +438,11 @@ static GLubyte *y_tile_swizzle(struct intel_renderbuffer *irb, struct intel_cont /** ** 8-bit x-tile stencil function (XXX FBO: This is obsolete) **/ -#define WRITE_STENCIL( _x, _y, d ) { \ - GLuint *a = (GLuint *) x_tile_swizzle (irb, intel, _x, _y); \ - GLuint tmp = *a; \ - tmp &= 0xffffff; \ - tmp |= ((d) << 24); \ - *a = tmp; \ -} +#define WRITE_STENCIL(_x, _y, d) \ + pwrite_8(irb, x_tile_swizzle(irb, intel, _x, _y) + 3, d) -#define READ_STENCIL( d, _x, _y ) \ - (d = *((GLuint*) x_tile_swizzle (irb, intel, _x, _y)) >> 24) +#define READ_STENCIL(d, _x, _y) \ + d = pread_8(irb, x_tile_swizzle(irb, intel, _x, _y) + 3); #define TAG(x) intel_XTile_##x##_z24_s8 #include "stenciltmp.h" @@ -460,16 +450,11 @@ static GLubyte *y_tile_swizzle(struct intel_renderbuffer *irb, struct intel_cont /** ** 8-bit y-tile stencil function (XXX FBO: This is obsolete) **/ -#define WRITE_STENCIL( _x, _y, d ) { \ - GLuint *a = (GLuint *) y_tile_swizzle (irb, intel, _x, _y); \ - GLuint tmp = *a; \ - tmp &= 0xffffff; \ - tmp |= ((d) << 24); \ - *a = tmp; \ -} +#define WRITE_STENCIL(_x, _y, d) \ + pwrite_8(irb, y_tile_swizzle(irb, intel, _x, _y) + 3, d) -#define READ_STENCIL( d, _x, _y ) \ - (d = *((GLuint*) y_tile_swizzle (irb, intel, _x, _y)) >> 24) +#define READ_STENCIL(d, _x, _y) \ + d = pread_8(irb, y_tile_swizzle(irb, intel, _x, _y) + 3) #define TAG(x) intel_YTile_##x##_z24_s8 #include "stenciltmp.h" @@ -482,9 +467,6 @@ intel_renderbuffer_map(struct intel_context *intel, struct gl_renderbuffer *rb) if (irb == NULL || irb->region == NULL) return; - intel_region_map(intel, irb->region); - - irb->pfMap = irb->region->map; irb->pfPitch = irb->region->pitch; intel_set_span_functions(intel, rb); @@ -499,9 +481,6 @@ intel_renderbuffer_unmap(struct intel_context *intel, if (irb == NULL || irb->region == NULL) return; - intel_region_unmap(intel, irb->region); - - irb->pfMap = NULL; irb->pfPitch = 0; rb->GetRow = NULL; @@ -559,35 +538,6 @@ intel_map_unmap_buffers(struct intel_context *intel, GLboolean map) else intel_renderbuffer_unmap(intel, ctx->ReadBuffer->_ColorReadBuffer); - /* Account for front/back color page flipping. - * The span routines use the pfMap and pfPitch fields which will - * swap the front/back region map/pitch if we're page flipped. - * Do this after mapping, above, so the map field is valid. - */ -#if 0 - if (map && ctx->DrawBuffer->Name == 0) { - struct intel_renderbuffer *irbFront - = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_FRONT_LEFT); - struct intel_renderbuffer *irbBack - = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_BACK_LEFT); - if (irbBack) { - /* double buffered */ - if (intel->sarea->pf_current_page == 0) { - irbFront->pfMap = irbFront->region->map; - irbFront->pfPitch = irbFront->region->pitch; - irbBack->pfMap = irbBack->region->map; - irbBack->pfPitch = irbBack->region->pitch; - } - else { - irbFront->pfMap = irbBack->region->map; - irbFront->pfPitch = irbBack->region->pitch; - irbBack->pfMap = irbFront->region->map; - irbBack->pfPitch = irbFront->region->pitch; - } - } - } -#endif - /* depth buffer (Note wrapper!) */ if (ctx->DrawBuffer->_DepthBuffer) { if (map) -- cgit v1.2.3 From 2e3714380027252ba17a11f23eae851d3f77ab02 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 23 Jul 2008 09:17:07 -0700 Subject: intel: Add a little span cache to spead up readpixels by cutting syscalls. --- src/mesa/drivers/dri/intel/intel_fbo.c | 3 +++ src/mesa/drivers/dri/intel/intel_fbo.h | 3 +++ src/mesa/drivers/dri/intel/intel_span.c | 48 ++++++++++++++++++++++++--------- 3 files changed, 42 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index d539097a66..254f3efae0 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -153,6 +153,9 @@ intel_delete_renderbuffer(struct gl_renderbuffer *rb) intel_unpair_depth_stencil(ctx, irb); } + if (irb->span_cache != NULL) + _mesa_free(irb->span_cache); + if (intel && irb->region) { intel_region_release(&irb->region); } diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h index f55d3747f2..9d15582d78 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.h +++ b/src/mesa/drivers/dri/intel/intel_fbo.h @@ -79,6 +79,9 @@ struct intel_renderbuffer GLuint pf_pending; /**< sequence number of pending flip */ GLuint vbl_pending; /**< vblank sequence number of pending flip */ + + uint8_t *span_cache; + unsigned long span_cache_offset; }; extern struct intel_renderbuffer *intel_renderbuffer(struct gl_renderbuffer diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index 44e2eff680..06f7c9b4b7 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -43,51 +43,74 @@ static void intel_set_span_functions(struct intel_context *intel, struct gl_renderbuffer *rb); +#define SPAN_CACHE_SIZE 4096 + +static void +get_span_cache(struct intel_renderbuffer *irb, uint32_t offset) +{ + if (irb->span_cache == NULL) { + irb->span_cache = _mesa_malloc(SPAN_CACHE_SIZE); + irb->span_cache_offset = -1; + } + + if ((offset & ~(SPAN_CACHE_SIZE - 1)) != irb->span_cache_offset) { + irb->span_cache_offset = offset & ~(SPAN_CACHE_SIZE - 1); + dri_bo_get_subdata(irb->region->buffer, irb->span_cache_offset, + SPAN_CACHE_SIZE, irb->span_cache); + } +} + +static void +clear_span_cache(struct intel_renderbuffer *irb) +{ + irb->span_cache_offset = -1; +} + static uint32_t pread_32(struct intel_renderbuffer *irb, uint32_t offset) { - uint32_t val; + get_span_cache(irb, offset); - dri_bo_get_subdata(irb->region->buffer, offset, 4, &val); - - return val; + return *(uint32_t *)(irb->span_cache + (offset & (SPAN_CACHE_SIZE - 1))); } static uint16_t pread_16(struct intel_renderbuffer *irb, uint32_t offset) { - uint16_t val; - - dri_bo_get_subdata(irb->region->buffer, offset, 2, &val); + get_span_cache(irb, offset); - return val; + return *(uint16_t *)(irb->span_cache + (offset & (SPAN_CACHE_SIZE - 1))); } static uint8_t pread_8(struct intel_renderbuffer *irb, uint32_t offset) { - uint8_t val; + get_span_cache(irb, offset); - dri_bo_get_subdata(irb->region->buffer, offset, 1, &val); - - return val; + return *(uint8_t *)(irb->span_cache + (offset & (SPAN_CACHE_SIZE - 1))); } static void pwrite_32(struct intel_renderbuffer *irb, uint32_t offset, uint32_t val) { + clear_span_cache(irb); + dri_bo_subdata(irb->region->buffer, offset, 4, &val); } static void pwrite_16(struct intel_renderbuffer *irb, uint32_t offset, uint16_t val) { + clear_span_cache(irb); + dri_bo_subdata(irb->region->buffer, offset, 2, &val); } static void pwrite_8(struct intel_renderbuffer *irb, uint32_t offset, uint8_t val) { + clear_span_cache(irb); + dri_bo_subdata(irb->region->buffer, offset, 1, &val); } @@ -481,6 +504,7 @@ intel_renderbuffer_unmap(struct intel_context *intel, if (irb == NULL || irb->region == NULL) return; + clear_span_cache(irb); irb->pfPitch = 0; rb->GetRow = NULL; -- cgit v1.2.3 From ff60e3fa031504b403766324b1fae3ccacec6650 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 25 Jul 2008 12:17:58 -0700 Subject: intel: If a tex image doesn't fit in the object's tree, make a temporary tree. Previously, we would just store the data as malloced memory hanging off the object, which would get memcpyed in at validate time. This broke an oglconform render-to-texture test, since validate wasn't called but a miptree was expected. --- src/mesa/drivers/dri/intel/intel_tex_image.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c index 6d57b2b7dd..b8dcd1e061 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_image.c +++ b/src/mesa/drivers/dri/intel/intel_tex_image.c @@ -395,10 +395,25 @@ intelTexImage(GLcontext * ctx, intel_miptree_reference(&intelImage->mt, intelObj->mt); assert(intelImage->mt); - } + } else if (intelImage->base.Border == 0) { + int comp_byte = 0; + + if (intelImage->base.IsCompressed) { + comp_byte = + intel_compressed_num_bytes(intelImage->base.TexFormat->MesaFormat); + } - if (!intelImage->mt) - DBG("XXX: Image did not fit into tree - storing in local memory!\n"); + /* Didn't fit in the object miptree, but it's suitable for inclusion in + * a miptree, so create one just for our level and store it in the image. + * It'll get moved into the object miptree at validate time. + */ + intelImage->mt = intel_miptree_create(intel, target, internalFormat, + level, level, + width, height, depth, + intelImage->base.TexFormat->TexelBytes, + comp_byte); + + } /* PBO fastpaths: */ -- cgit v1.2.3 From e5022c3fdf9888857f22f9a1690035ff3f90d36b Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 25 Jul 2008 12:40:16 -0700 Subject: mesa: Return 0 for cube map face of non-cubemap framebuffer attachments. Fixes some oglconform fbo testcases. --- src/mesa/main/fbobject.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 8e9948cb45..cecc89ac38 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -1493,7 +1493,10 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, return; case GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_CUBE_MAP_FACE_EXT: if (att->Type == GL_TEXTURE) { - *params = GL_TEXTURE_CUBE_MAP_POSITIVE_X + att->CubeMapFace; + if (att->Texture->Target == GL_TEXTURE_CUBE_MAP) + *params = GL_TEXTURE_CUBE_MAP_POSITIVE_X + att->CubeMapFace; + else + *params = 0; } else { _mesa_error(ctx, GL_INVALID_ENUM, -- cgit v1.2.3 From 902e401a384a8213d1239aae42bc2b7071ad6bd8 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sat, 26 Jul 2008 17:12:04 -0700 Subject: intel: Don't return a renderbuffer with alpha when just GL_RGB is requested. Fixes oglconform rbGetterFuncs testcase. The span code for this mode hasn't actually been tested. --- src/mesa/drivers/dri/intel/intel_fbo.c | 8 +++++ src/mesa/drivers/dri/intel/intel_span.c | 64 +++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index 254f3efae0..5bd2ebfdcf 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -212,6 +212,14 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, case GL_RGB10: case GL_RGB12: case GL_RGB16: + rb->_ActualFormat = GL_RGB8; + rb->DataType = GL_UNSIGNED_BYTE; + rb->RedBits = 8; + rb->GreenBits = 8; + rb->BlueBits = 8; + rb->AlphaBits = 0; + cpp = 4; + break; case GL_RGBA: case GL_RGBA2: case GL_RGBA4: diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index 06f7c9b4b7..079b9e6a9d 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -74,6 +74,15 @@ pread_32(struct intel_renderbuffer *irb, uint32_t offset) return *(uint32_t *)(irb->span_cache + (offset & (SPAN_CACHE_SIZE - 1))); } +static uint32_t +pread_xrgb8888(struct intel_renderbuffer *irb, uint32_t offset) +{ + get_span_cache(irb, offset); + + return *(uint32_t *)(irb->span_cache + (offset & (SPAN_CACHE_SIZE - 1))) | + 0xff000000; +} + static uint16_t pread_16(struct intel_renderbuffer *irb, uint32_t offset) { @@ -98,6 +107,14 @@ pwrite_32(struct intel_renderbuffer *irb, uint32_t offset, uint32_t val) dri_bo_subdata(irb->region->buffer, offset, 4, &val); } +static void +pwrite_xrgb8888(struct intel_renderbuffer *irb, uint32_t offset, uint32_t val) +{ + clear_span_cache(irb); + + dri_bo_subdata(irb->region->buffer, offset, 3, &val); +} + static void pwrite_16(struct intel_renderbuffer *irb, uint32_t offset, uint16_t val) { @@ -301,6 +318,17 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, #define PUT_VALUE(X, Y, V) pwrite_32(irb, no_tile_swizzle(irb, intel, X, Y), V) #include "spantmp2.h" +/* 32 bit, xRGB8888 color spanline and pixel functions + */ +#define SPANTMP_PIXEL_FMT GL_BGRA +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV + +#define TAG(x) intel##x##_xRGB8888 +#define TAG2(x,y) intel##x##_xRGB8888##y +#define GET_VALUE(X, Y) pread_xrgb8888(irb, no_tile_swizzle(irb, intel, X, Y)) +#define PUT_VALUE(X, Y, V) pwrite_xrgb8888(irb, no_tile_swizzle(irb, intel, X, Y), V) +#include "spantmp2.h" + /* 16 bit RGB565 color tile spanline and pixel functions */ @@ -343,6 +371,27 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, #define PUT_VALUE(X, Y, V) pwrite_32(irb, y_tile_swizzle(irb, intel, X, Y), V) #include "spantmp2.h" +/* 32 bit xRGB888 color tile spanline and pixel functions + */ + +#define SPANTMP_PIXEL_FMT GL_BGRA +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV + +#define TAG(x) intel_XTile_##x##_xRGB8888 +#define TAG2(x,y) intel_XTile_##x##_xRGB8888##y +#define GET_VALUE(X, Y) pread_xrgb8888(irb, x_tile_swizzle(irb, intel, X, Y)) +#define PUT_VALUE(X, Y, V) pwrite_xrgb8888(irb, x_tile_swizzle(irb, intel, X, Y), V) +#include "spantmp2.h" + +#define SPANTMP_PIXEL_FMT GL_BGRA +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV + +#define TAG(x) intel_YTile_##x##_xRGB8888 +#define TAG2(x,y) intel_YTile_##x##_xRGB8888##y +#define GET_VALUE(X, Y) pread_xrgb8888(irb, y_tile_swizzle(irb, intel, X, Y)) +#define PUT_VALUE(X, Y, V) pwrite_xrgb8888(irb, y_tile_swizzle(irb, intel, X, Y), V) +#include "spantmp2.h" + #define LOCAL_DEPTH_VARS \ struct intel_context *intel = intel_context(ctx); \ struct intel_renderbuffer *irb = intel_renderbuffer(rb); \ @@ -677,6 +726,21 @@ intel_set_span_functions(struct intel_context *intel, break; } } + else if (rb->_ActualFormat == GL_RGB8) { + /* 8888 RGBx */ + switch (tiling) { + case I915_TILING_NONE: + default: + intelInitPointers_xRGB8888(rb); + break; + case I915_TILING_X: + intel_XTile_InitPointers_xRGB8888(rb); + break; + case I915_TILING_Y: + intel_YTile_InitPointers_xRGB8888(rb); + break; + } + } else if (rb->_ActualFormat == GL_RGBA8) { /* 8888 RGBA */ switch (tiling) { -- cgit v1.2.3 From c9120a0d1928f8ca6114923fd307fbc5c358eb0b Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 30 Jul 2008 14:14:44 -0700 Subject: intel-gem: Use new getparam to detect kernel GEM support. --- src/mesa/drivers/dri/intel/intel_context.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 082dfe87e3..8a4d5d2114 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -464,7 +464,14 @@ intel_init_bufmgr(struct intel_context *intel) { intelScreenPrivate *intelScreen = intel->intelScreen; GLboolean gem_disable = getenv("INTEL_NO_GEM") != NULL; + int gem_kernel = 0; GLboolean gem_supported; + struct drm_i915_getparam gp; + + gp.param = I915_PARAM_HAS_GEM; + gp.value = &gem_kernel; + + (void) drmCommandWriteRead(intel->driFd, DRM_I915_GETPARAM, &gp, sizeof(gp)); /* If we've got a new enough DDX that's initializing GEM and giving us * object handles for the shared buffers, use that. @@ -473,7 +480,7 @@ intel_init_bufmgr(struct intel_context *intel) if (intel->intelScreen->driScrnPriv->dri2.enabled) gem_supported = GL_TRUE; else if (intel->intelScreen->driScrnPriv->ddx_version.minor >= 9 && - intel->intelScreen->drmMinor >= 11 && + gem_kernel && intel->intelScreen->front.bo_handle != -1) gem_supported = GL_TRUE; else -- cgit v1.2.3 From ded9414024ef7b2fb1d991d872c56c0d85e9ce1f Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 31 Jul 2008 14:13:30 -0700 Subject: intel-gem: Always build GEM execbuffer code. --- src/mesa/drivers/dri/intel/intel_ioctl.c | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/intel/intel_ioctl.c b/src/mesa/drivers/dri/intel/intel_ioctl.c index c0a18fa225..58c81766cd 100644 --- a/src/mesa/drivers/dri/intel/intel_ioctl.c +++ b/src/mesa/drivers/dri/intel/intel_ioctl.c @@ -150,7 +150,6 @@ intel_batch_ioctl(struct intel_context *intel, return 0; } -#ifdef TTM_API int intel_exec_ioctl(struct intel_context *intel, GLuint used, @@ -184,13 +183,3 @@ intel_exec_ioctl(struct intel_context *intel, return 0; } -#else -int -int intel_exec_ioctl(struct intel_context *intel, - GLuint used, - GLboolean ignore_cliprects, GLboolean allow_unlock, - struct drm_i915_gem_execbuffer *execbuf); -{ - return -EINVAL; -} -#endif -- cgit v1.2.3 From 527e1cf172cb0a4d1f2891a351498669be1620cd Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 8 Aug 2008 10:53:25 -0700 Subject: 965: cleanups to state emission from aperture checking and state ordering. --- src/mesa/drivers/dri/i965/brw_cc.c | 4 ++-- src/mesa/drivers/dri/i965/brw_curbe.c | 14 +------------- src/mesa/drivers/dri/i965/brw_misc_state.c | 19 ------------------- src/mesa/drivers/dri/i965/brw_urb.c | 12 ------------ 4 files changed, 3 insertions(+), 46 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c index 49a80d3e4a..bad9c4a11e 100644 --- a/src/mesa/drivers/dri/i965/brw_cc.c +++ b/src/mesa/drivers/dri/i965/brw_cc.c @@ -37,7 +37,7 @@ #include "macros.h" #include "enums.h" -static int upload_cc_vp( struct brw_context *brw ) +static int prepare_cc_vp( struct brw_context *brw ) { struct brw_cc_viewport ccv; @@ -57,7 +57,7 @@ const struct brw_tracked_state brw_cc_vp = { .brw = BRW_NEW_CONTEXT, .cache = 0 }, - .prepare = upload_cc_vp + .prepare = prepare_cc_vp }; struct brw_cc_unit_key { diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index b603be8fc1..8047941602 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -156,19 +156,7 @@ void brw_upload_constant_buffer_state(struct brw_context *brw) assert(brw->urb.nr_cs_entries); BRW_CACHED_BATCH_STRUCT(brw, &cbs); -} - -#if 0 -const struct brw_tracked_state brw_constant_buffer_state = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_URB_FENCE, - .cache = 0 - }, - .update = brw_upload_constant_buffer_state -}; -#endif - +} static GLfloat fixed_plane[6][4] = { { 0, 0, -1, 1 }, diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 653695cdf5..9634d649dd 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -133,24 +133,6 @@ static void upload_pipelined_state_pointers(struct brw_context *brw ) brw->state.dirty.brw |= BRW_NEW_PSP; } -#if 0 -/* Combined into brw_psp_urb_cbs */ -const struct brw_tracked_state brw_pipelined_state_pointers = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_METAOPS | BRW_NEW_BATCH, - .cache = (CACHE_NEW_VS_UNIT | - CACHE_NEW_GS_UNIT | - CACHE_NEW_GS_PROG | - CACHE_NEW_CLIP_UNIT | - CACHE_NEW_SF_UNIT | - CACHE_NEW_WM_UNIT | - CACHE_NEW_CC_UNIT) - }, - .emit = upload_pipelined_state_pointers -}; -#endif - static void upload_psp_urb_cbs(struct brw_context *brw ) { upload_pipelined_state_pointers(brw); @@ -158,7 +140,6 @@ static void upload_psp_urb_cbs(struct brw_context *brw ) brw_upload_constant_buffer_state(brw); } - const struct brw_tracked_state brw_psp_urb_cbs = { .dirty = { .mesa = 0, diff --git a/src/mesa/drivers/dri/i965/brw_urb.c b/src/mesa/drivers/dri/i965/brw_urb.c index c423dbe7d7..244c82169a 100644 --- a/src/mesa/drivers/dri/i965/brw_urb.c +++ b/src/mesa/drivers/dri/i965/brw_urb.c @@ -187,15 +187,3 @@ void brw_upload_urb_fence(struct brw_context *brw) BRW_BATCH_STRUCT(brw, &uf); } - - -#if 0 -const struct brw_tracked_state brw_urb_fence = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_URB_FENCE | BRW_NEW_PSP, - .cache = 0 - }, - .update = brw_upload_urb_fence -}; -#endif -- cgit v1.2.3 From d2796939f18815935c8fe1effb01fa9765d6c7d8 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 8 Aug 2008 13:58:48 -0700 Subject: intel-gem: Update to new check_aperture API for classic mode. To do this, I had to clean up some of 965 state upload stuff. We may end up over-emitting state in the aperture overflow case, but that should be rare, and I'd rather have the simplification of state management. --- src/mesa/drivers/dri/i915/i830_vtbl.c | 17 ++-- src/mesa/drivers/dri/i915/i915_vtbl.c | 29 +++--- src/mesa/drivers/dri/i915/intel_tris.c | 10 ++- src/mesa/drivers/dri/i965/brw_cc.c | 6 +- src/mesa/drivers/dri/i965/brw_clip.c | 4 +- src/mesa/drivers/dri/i965/brw_clip_state.c | 6 +- src/mesa/drivers/dri/i965/brw_context.h | 19 +++- src/mesa/drivers/dri/i965/brw_curbe.c | 19 ++-- src/mesa/drivers/dri/i965/brw_draw.c | 72 ++++----------- src/mesa/drivers/dri/i965/brw_draw.h | 23 ----- src/mesa/drivers/dri/i965/brw_draw_upload.c | 109 +++++++++++++---------- src/mesa/drivers/dri/i965/brw_fallback.c | 3 +- src/mesa/drivers/dri/i965/brw_gs.c | 7 +- src/mesa/drivers/dri/i965/brw_gs_state.c | 3 +- src/mesa/drivers/dri/i965/brw_misc_state.c | 44 +++++---- src/mesa/drivers/dri/i965/brw_sf.c | 3 +- src/mesa/drivers/dri/i965/brw_sf_state.c | 16 +--- src/mesa/drivers/dri/i965/brw_state.h | 3 + src/mesa/drivers/dri/i965/brw_state_upload.c | 39 +++++--- src/mesa/drivers/dri/i965/brw_urb.c | 3 +- src/mesa/drivers/dri/i965/brw_vs.c | 3 +- src/mesa/drivers/dri/i965/brw_vs_constval.c | 3 +- src/mesa/drivers/dri/i965/brw_vs_state.c | 3 +- src/mesa/drivers/dri/i965/brw_vs_tnl.c | 10 +-- src/mesa/drivers/dri/i965/brw_wm.c | 4 +- src/mesa/drivers/dri/i965/brw_wm_sampler_state.c | 10 +-- src/mesa/drivers/dri/i965/brw_wm_state.c | 9 +- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 38 ++------ src/mesa/drivers/dri/intel/intel_batchbuffer.c | 4 - src/mesa/drivers/dri/intel/intel_blit.c | 30 ++++--- src/mesa/drivers/dri/intel/intel_context.h | 1 + 31 files changed, 245 insertions(+), 305 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c index 0af5ed0b50..40a50ff772 100644 --- a/src/mesa/drivers/dri/i915/i830_vtbl.c +++ b/src/mesa/drivers/dri/i915/i830_vtbl.c @@ -420,10 +420,12 @@ i830_emit_state(struct intel_context *intel) { struct i830_context *i830 = i830_context(&intel->ctx); struct i830_hw_state *state = i830->current; - int i, ret, count; + int i, count; GLuint dirty; GET_CURRENT_CONTEXT(ctx); BATCH_LOCALS; + dri_bo *aper_array[3 + I830_TEX_UNITS]; + int aper_count; /* We don't hold the lock at this point, so want to make sure that * there won't be a buffer wrap between the state emits and the primitive @@ -441,22 +443,23 @@ i830_emit_state(struct intel_context *intel) LOOP_CLIPRECTS); count = 0; again: + aper_count = 0; dirty = get_dirty(state); - ret = 0; + aper_array[aper_count++] = intel->batch->buf; if (dirty & I830_UPLOAD_BUFFERS) { - ret |= dri_bufmgr_check_aperture_space(state->draw_region->buffer); - ret |= dri_bufmgr_check_aperture_space(state->depth_region->buffer); + aper_array[aper_count++] = state->draw_region->buffer; + aper_array[aper_count++] = state->depth_region->buffer; } - + for (i = 0; i < I830_TEX_UNITS; i++) if (dirty & I830_UPLOAD_TEX(i)) { if (state->tex_buffer[i]) { - ret |= dri_bufmgr_check_aperture_space(state->tex_buffer[i]); + aper_array[aper_count++] = state->tex_buffer[i]; } } - if (ret) { + if (dri_bufmgr_check_aperture_space(aper_array, aper_count)) { if (count == 0) { count++; intel_batchbuffer_flush(intel->batch); diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index 27dfc2b890..19f2206285 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -297,9 +297,9 @@ i915_emit_state(struct intel_context *intel) { struct i915_context *i915 = i915_context(&intel->ctx); struct i915_hw_state *state = i915->current; - int i; - int ret, count; + int i, count, aper_count; GLuint dirty; + dri_bo *aper_array[3 + I915_TEX_UNITS]; GET_CURRENT_CONTEXT(ctx); BATCH_LOCALS; @@ -319,24 +319,27 @@ i915_emit_state(struct intel_context *intel) LOOP_CLIPRECTS); count = 0; again: + aper_count = 0; dirty = get_dirty(state); - ret = 0; + aper_array[aper_count++] = intel->batch->buf; if (dirty & I915_UPLOAD_BUFFERS) { - ret |= dri_bufmgr_check_aperture_space(state->draw_region->buffer); - if (state->depth_region) - ret |= dri_bufmgr_check_aperture_space(state->depth_region->buffer); + aper_array[aper_count++] = state->draw_region->buffer; + if (state->depth_region) + aper_array[aper_count++] = state->depth_region->buffer; } if (dirty & I915_UPLOAD_TEX_ALL) { - for (i = 0; i < I915_TEX_UNITS; i++) - if (dirty & I915_UPLOAD_TEX(i)) { - if (state->tex_buffer[i]) { - ret |= dri_bufmgr_check_aperture_space(state->tex_buffer[i]); - } - } + for (i = 0; i < I915_TEX_UNITS; i++) { + if (dirty & I915_UPLOAD_TEX(i)) { + if (state->tex_buffer[i]) { + aper_array[aper_count++] = state->tex_buffer[i]; + } + } + } } - if (ret) { + + if (dri_bufmgr_check_aperture_space(aper_array, aper_count)) { if (count == 0) { count++; intel_batchbuffer_flush(intel->batch); diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c index 8714dd15f3..5f4b852212 100644 --- a/src/mesa/drivers/dri/i915/intel_tris.c +++ b/src/mesa/drivers/dri/i915/intel_tris.c @@ -92,8 +92,6 @@ uint32_t *intel_get_prim_space(struct intel_context *intel, unsigned int count) INTEL_VB_SIZE, 4); intel->prim.start_offset = 0; intel->prim.current_offset = 0; - - dri_bufmgr_check_aperture_space(intel->prim.vb_bo); } intel->prim.flush = intel_flush_prim; @@ -109,6 +107,7 @@ uint32_t *intel_get_prim_space(struct intel_context *intel, unsigned int count) void intel_flush_prim(struct intel_context *intel) { BATCH_LOCALS; + dri_bo *aper_array[2]; dri_bo *vb_bo; /* Must be called after an intel_start_prim. */ @@ -127,6 +126,13 @@ void intel_flush_prim(struct intel_context *intel) intel->vtbl.emit_state(intel); + aper_array[0] = intel->batch->buf; + aper_array[1] = vb_bo; + if (dri_bufmgr_check_aperture_space(aper_array, 2)) { + intel_batchbuffer_flush(intel->batch); + intel->vtbl.emit_state(intel); + } + /* Ensure that we don't start a new batch for the following emit, which * depends on the state just emitted. emit_state should be making sure we * have the space for this. diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c index bad9c4a11e..d662cf7521 100644 --- a/src/mesa/drivers/dri/i965/brw_cc.c +++ b/src/mesa/drivers/dri/i965/brw_cc.c @@ -37,7 +37,7 @@ #include "macros.h" #include "enums.h" -static int prepare_cc_vp( struct brw_context *brw ) +static void prepare_cc_vp( struct brw_context *brw ) { struct brw_cc_viewport ccv; @@ -48,7 +48,6 @@ static int prepare_cc_vp( struct brw_context *brw ) dri_bo_unreference(brw->cc.vp_bo); brw->cc.vp_bo = brw_cache_data( &brw->cache, BRW_CC_VP, &ccv, NULL, 0 ); - return dri_bufmgr_check_aperture_space(brw->cc.vp_bo); } const struct brw_tracked_state brw_cc_vp = { @@ -266,7 +265,7 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key) return bo; } -static int prepare_cc_unit( struct brw_context *brw ) +static void prepare_cc_unit( struct brw_context *brw ) { struct brw_cc_unit_key key; @@ -280,7 +279,6 @@ static int prepare_cc_unit( struct brw_context *brw ) if (brw->cc.state_bo == NULL) brw->cc.state_bo = cc_unit_create_from_key(brw, &key); - return dri_bufmgr_check_aperture_space(brw->cc.state_bo); } const struct brw_tracked_state brw_cc_unit = { diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c index 540108e5f4..22981fd2d9 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.c +++ b/src/mesa/drivers/dri/i965/brw_clip.c @@ -131,7 +131,7 @@ static void compile_clip_prog( struct brw_context *brw, /* Calculate interpolants for triangle and line rasterization. */ -static int upload_clip_prog( struct brw_context *brw ) +static void upload_clip_prog(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct brw_clip_prog_key key; @@ -242,8 +242,6 @@ static int upload_clip_prog( struct brw_context *brw ) &brw->clip.prog_data); if (brw->clip.prog_bo == NULL) compile_clip_prog( brw, &key ); - - return dri_bufmgr_check_aperture_space(brw->clip.prog_bo); } diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c index 974cb77cb8..ae904c6253 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_state.c +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c @@ -129,10 +129,9 @@ clip_unit_create_from_key(struct brw_context *brw, return bo; } -static int upload_clip_unit( struct brw_context *brw ) +static void upload_clip_unit( struct brw_context *brw ) { struct brw_clip_unit_key key; - int ret = 0; clip_unit_populate_key(brw, &key); @@ -144,9 +143,6 @@ static int upload_clip_unit( struct brw_context *brw ) if (brw->clip.state_bo == NULL) { brw->clip.state_bo = clip_unit_create_from_key(brw, &key); } - - ret = dri_bufmgr_check_aperture_space(brw->clip.state_bo); - return ret; } const struct brw_tracked_state brw_clip_unit = { diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 32e05542e0..330d5714da 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -135,6 +135,8 @@ struct brw_context; #define BRW_NEW_METAOPS 0x1000 #define BRW_NEW_FENCE 0x2000 #define BRW_NEW_LOCK 0x4000 +#define BRW_NEW_INDICES 0x8000 +#define BRW_NEW_VERTICES 0x10000 /** * Used for any batch entry with a relocated pointer that will be used * by any 3D rendering. @@ -332,7 +334,7 @@ struct brw_state_pointers { */ struct brw_tracked_state { struct brw_state_flags dirty; - int (*prepare)( struct brw_context *brw ); + void (*prepare)( struct brw_context *brw ); void (*emit)( struct brw_context *brw ); }; @@ -450,8 +452,21 @@ struct brw_context * for changes to this state: */ struct brw_vertex_info info; + unsigned int min_index, max_index; } vb; + struct { + /** + * Index buffer for this draw_prims call. + * + * Updates are signaled by BRW_NEW_INDICES. + */ + const struct _mesa_index_buffer *ib; + + dri_bo *bo; + unsigned int offset; + } ib; + struct { /* Will be allocated on demand if needed. */ @@ -641,7 +656,7 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, /*====================================================================== * brw_state.c */ -int brw_validate_state( struct brw_context *brw ); +void brw_validate_state( struct brw_context *brw ); void brw_init_state( struct brw_context *brw ); void brw_destroy_state( struct brw_context *brw ); diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 8047941602..0a3600193b 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -46,7 +46,7 @@ /* Partition the CURBE between the various users of constant values: */ -static int calculate_curbe_offsets( struct brw_context *brw ) +static void calculate_curbe_offsets( struct brw_context *brw ) { /* CACHE_NEW_WM_PROG */ GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16; @@ -117,7 +117,6 @@ static int calculate_curbe_offsets( struct brw_context *brw ) brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS; } - return 0; } @@ -171,7 +170,7 @@ static GLfloat fixed_plane[6][4] = { * cache mechanism, but maybe would benefit from a comparison against * the current uploaded set of constants. */ -static int prepare_constant_buffer(struct brw_context *brw) +static void prepare_constant_buffer(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; @@ -195,8 +194,8 @@ static int prepare_constant_buffer(struct brw_context *brw) brw->curbe.last_buf = NULL; brw->curbe.last_bufsz = 0; } - - return 0; + + return; } buf = (GLfloat *)malloc(bufsz); @@ -321,9 +320,6 @@ static int prepare_constant_buffer(struct brw_context *brw) * flushes as necessary when doublebuffering of CURBEs isn't * possible. */ - - /* check aperture space for this bo */ - return dri_bufmgr_check_aperture_space(brw->curbe.curbe_bo); } @@ -331,6 +327,13 @@ static void emit_constant_buffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; GLuint sz = brw->curbe.total_size; + dri_bo *aper_array[] = { + brw->intel.batch->buf, + brw->curbe.curbe_bo, + }; + + if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array))) + intel_batchbuffer_flush(intel->batch); BEGIN_BATCH(2, IGNORE_CLIPRECTS); if (sz == 0) { diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index f90c5f7b08..d43b52c2c7 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -83,9 +83,8 @@ static const GLenum reduced_prim[GL_POLYGON+1] = { * programs be immune to the active primitive (ie. cope with all * possibilities). That may not be realistic however. */ -static GLuint brw_set_prim(struct brw_context *brw, GLenum prim, GLboolean *need_flush) +static GLuint brw_set_prim(struct brw_context *brw, GLenum prim) { - int ret; if (INTEL_DEBUG & DEBUG_PRIMS) _mesa_printf("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim)); @@ -106,9 +105,7 @@ static GLuint brw_set_prim(struct brw_context *brw, GLenum prim, GLboolean *need brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE; } - ret = brw_validate_state(brw); - if (ret) - *need_flush = GL_TRUE; + brw_validate_state(brw); } return hw_prim[prim]; @@ -131,7 +128,6 @@ static void brw_emit_prim( struct brw_context *brw, { struct brw_3d_primitive prim_packet; - GLboolean need_flush = GL_FALSE; if (INTEL_DEBUG & DEBUG_PRIMS) _mesa_printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), @@ -140,7 +136,7 @@ static void brw_emit_prim( struct brw_context *brw, prim_packet.header.opcode = CMD_3D_PRIM; prim_packet.header.length = sizeof(prim_packet)/4 - 2; prim_packet.header.pad = 0; - prim_packet.header.topology = brw_set_prim(brw, prim->mode, &need_flush); + prim_packet.header.topology = brw_set_prim(brw, prim->mode); prim_packet.header.indexed = prim->indexed; prim_packet.verts_per_instance = trim(prim->mode, prim->count); @@ -149,12 +145,13 @@ static void brw_emit_prim( struct brw_context *brw, prim_packet.start_instance_location = 0; prim_packet.base_vert_location = 0; + /* Can't wrap here, since we rely on the validated state. */ + brw->no_batch_wrap = GL_TRUE; if (prim_packet.verts_per_instance) { intel_batchbuffer_data( brw->intel.batch, &prim_packet, sizeof(prim_packet), LOOP_CLIPRECTS); } - - assert(need_flush == GL_FALSE); + brw->no_batch_wrap = GL_FALSE; } static void brw_merge_inputs( struct brw_context *brw, @@ -258,10 +255,6 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, struct brw_context *brw = brw_context(ctx); GLboolean retval = GL_FALSE; GLuint i; - GLuint ib_offset; - dri_bo *ib_bo; - GLboolean force_flush = GL_FALSE; - int ret; if (ctx->NewState) _mesa_update_state( ctx ); @@ -271,7 +264,13 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, /* Bind all inputs, derive varying and size information: */ brw_merge_inputs( brw, arrays ); - + + brw->ib.ib = ib; + brw->state.dirty.brw |= BRW_NEW_INDICES; + + brw->vb.min_index = min_index; + brw->vb.max_index = max_index; + brw->state.dirty.brw |= BRW_NEW_VERTICES; /* Have to validate state quite late. Will rebuild tnl_program, * which depends on varying information. * @@ -294,29 +293,18 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, * an upper bound of how much we might emit in a single * brw_try_draw_prims(). */ - flush: - if (force_flush) - brw->no_batch_wrap = GL_FALSE; - if (intel->batch->ptr - intel->batch->map > intel->batch->size * 3 / 4 /* brw_emit_prim may change the cliprect_mode to LOOP_CLIPRECTS */ - || intel->batch->cliprect_mode != LOOP_CLIPRECTS || (force_flush == GL_TRUE)) + || intel->batch->cliprect_mode != LOOP_CLIPRECTS) intel_batchbuffer_flush(intel->batch); - force_flush = GL_FALSE; - brw->no_batch_wrap = GL_TRUE; - /* Set the first primitive early, ahead of validate_state: */ - brw_set_prim(brw, prim[0].mode, &force_flush); + brw_set_prim(brw, prim[0].mode); /* XXX: Need to separate validate and upload of state. */ - ret = brw_validate_state( brw ); - if (ret) { - force_flush = GL_TRUE; - goto flush; - } + brw_validate_state( brw ); /* Various fallback checks: */ @@ -326,31 +314,6 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, if (check_fallbacks( brw, prim, nr_prims )) goto out; - /* need to account for index buffer and vertex buffer */ - if (ib) { - ret = brw_prepare_indices( brw, ib , &ib_bo, &ib_offset); - if (ret) { - force_flush = GL_TRUE; - goto flush; - } - } - - ret = brw_prepare_vertices( brw, min_index, max_index); - if (ret < 0) - goto out; - - if (ret > 0) { - force_flush = GL_TRUE; - goto flush; - } - - /* Upload index, vertex data: - */ - if (ib) - brw_emit_indices( brw, ib, ib_bo, ib_offset); - - brw_emit_vertices( brw, min_index, max_index); - for (i = 0; i < nr_prims; i++) { brw_emit_prim(brw, &prim[i]); } @@ -359,9 +322,6 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, } out: - - brw->no_batch_wrap = GL_FALSE; - UNLOCK_HARDWARE(intel); if (!retval) diff --git a/src/mesa/drivers/dri/i965/brw_draw.h b/src/mesa/drivers/dri/i965/brw_draw.h index b3547400d4..2a3e0c1c5b 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.h +++ b/src/mesa/drivers/dri/i965/brw_draw.h @@ -51,27 +51,4 @@ void brw_draw_destroy( struct brw_context *brw ); void brw_init_current_values(GLcontext *ctx, struct gl_client_array *arrays); - -/* brw_draw_upload.c - */ -int brw_prepare_indices( struct brw_context *brw, - const struct _mesa_index_buffer *index_buffer, - dri_bo **bo_return, - GLuint *offset_return); - -void brw_emit_indices( struct brw_context *brw, - const struct _mesa_index_buffer *index_buffer, - dri_bo *bo, - GLuint offset); - -int brw_prepare_vertices( struct brw_context *brw, - GLuint min_index, - GLuint max_index ); - -void brw_emit_vertices( struct brw_context *brw, - GLuint min_index, - GLuint max_index ); - - - #endif diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 0181b06764..9427131440 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -302,9 +302,7 @@ copy_array_to_vbo_array( struct brw_context *brw, dri_bo_unmap(element->bo); } -int brw_prepare_vertices( struct brw_context *brw, - GLuint min_index, - GLuint max_index ) +static void brw_prepare_vertices(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); @@ -312,7 +310,8 @@ int brw_prepare_vertices( struct brw_context *brw, GLuint i; const unsigned char *ptr = NULL; GLuint interleave = 0; - int ret = 0; + unsigned int min_index = brw->vb.min_index; + unsigned int max_index = brw->vb.max_index; struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; GLuint nr_enabled = 0; @@ -340,8 +339,10 @@ int brw_prepare_vertices( struct brw_context *brw, * cases with > 17 vertex attributes enabled, so it probably * isn't an issue at this point. */ - if (nr_enabled >= BRW_VEP_MAX) - return -1; + if (nr_enabled >= BRW_VEP_MAX) { + intel->Fallback = 1; + return; + } for (i = 0; i < nr_enabled; i++) { struct brw_vertex_element *input = enabled[i]; @@ -359,8 +360,6 @@ int brw_prepare_vertices( struct brw_context *brw, dri_bo_reference(input->bo); input->offset = (unsigned long)input->glarray->Ptr; input->stride = input->glarray->StrideB; - - ret |= dri_bufmgr_check_aperture_space(input->bo); } else { /* Queue the buffer object up to be uploaded in the next pass, * when we've decided if we're doing interleaved or not. @@ -369,7 +368,7 @@ int brw_prepare_vertices( struct brw_context *brw, /* Position array not properly enabled: */ if (input->glarray->StrideB == 0) - return -1; + return; interleave = input->glarray->StrideB; ptr = input->glarray->Ptr; @@ -401,7 +400,6 @@ int brw_prepare_vertices( struct brw_context *brw, */ copy_array_to_vbo_array(brw, upload[0], interleave); - ret |= dri_bufmgr_check_aperture_space(upload[0]->bo); for (i = 1; i < nr_uploads; i++) { /* Then, just point upload[i] at upload[0]'s buffer. */ upload[i]->stride = interleave; @@ -415,23 +413,11 @@ int brw_prepare_vertices( struct brw_context *brw, /* Upload non-interleaved arrays */ for (i = 0; i < nr_uploads; i++) { copy_array_to_vbo_array(brw, upload[i], upload[i]->element_size); - if (upload[i]->bo) { - ret |= dri_bufmgr_check_aperture_space(upload[i]->bo); - } } } - - - if (ret) - return 1; - - - return 0; } -void brw_emit_vertices( struct brw_context *brw, - GLuint min_index, - GLuint max_index ) +static void brw_emit_vertices(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); @@ -469,7 +455,7 @@ void brw_emit_vertices( struct brw_context *brw, OUT_RELOC(input->bo, I915_GEM_DOMAIN_VERTEX, 0, input->offset); - OUT_BATCH(max_index); + OUT_BATCH(brw->vb.max_index); OUT_BATCH(0); /* Instance data step rate */ /* Unreference the buffer so it can get freed, now that we won't @@ -513,18 +499,31 @@ void brw_emit_vertices( struct brw_context *brw, ADVANCE_BATCH(); } -int brw_prepare_indices( struct brw_context *brw, - const struct _mesa_index_buffer *index_buffer, - dri_bo **bo_return, - GLuint *offset_return) +const struct brw_tracked_state brw_vertices = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH | BRW_NEW_VERTICES, + .cache = 0, + }, + .prepare = brw_prepare_vertices, + .emit = brw_emit_vertices, +}; + +static void brw_prepare_indices(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = &brw->intel; - GLuint ib_size = get_size(index_buffer->type) * index_buffer->count; + const struct _mesa_index_buffer *index_buffer = brw->ib.ib; + GLuint ib_size; dri_bo *bo; - struct gl_buffer_object *bufferobj = index_buffer->obj; - GLuint offset = (GLuint)index_buffer->ptr; - int ret; + struct gl_buffer_object *bufferobj; + GLuint offset; + + if (index_buffer == NULL) + return; + + ib_size = get_size(index_buffer->type) * index_buffer->count; + bufferobj = index_buffer->obj;; /* Turn into a proper VBO: */ @@ -538,6 +537,8 @@ int brw_prepare_indices( struct brw_context *brw, */ dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr); } else { + offset = (GLuint)index_buffer->ptr; + /* If the index buffer isn't aligned to its element size, we have to * rebase it into a temporary. */ @@ -560,19 +561,22 @@ int brw_prepare_indices( struct brw_context *brw, } } - *bo_return = bo; - *offset_return = offset; - ret = dri_bufmgr_check_aperture_space(bo); - return ret; + dri_bo_unreference(brw->ib.bo); + brw->ib.bo = bo; + brw->ib.offset = offset; } -void brw_emit_indices(struct brw_context *brw, - const struct _mesa_index_buffer *index_buffer, - dri_bo *bo, - GLuint offset) +static void brw_emit_indices(struct brw_context *brw) { struct intel_context *intel = &brw->intel; - GLuint ib_size = get_size(index_buffer->type) * index_buffer->count; + const struct _mesa_index_buffer *index_buffer = brw->ib.ib; + GLuint ib_size; + + if (index_buffer == NULL) + return; + + ib_size = get_size(index_buffer->type) * index_buffer->count; + /* Emit the indexbuffer packet: */ { @@ -588,16 +592,23 @@ void brw_emit_indices(struct brw_context *brw, BEGIN_BATCH(4, IGNORE_CLIPRECTS); OUT_BATCH( ib.header.dword ); - OUT_RELOC( bo, - I915_GEM_DOMAIN_VERTEX, 0, - offset); - OUT_RELOC( bo, - I915_GEM_DOMAIN_VERTEX, 0, - offset + ib_size); + OUT_RELOC(brw->ib.bo, + I915_GEM_DOMAIN_VERTEX, 0, + brw->ib.offset); + OUT_RELOC(brw->ib.bo, + I915_GEM_DOMAIN_VERTEX, 0, + brw->ib.offset + ib_size); OUT_BATCH( 0 ); ADVANCE_BATCH(); - - dri_bo_unreference(bo); } } +const struct brw_tracked_state brw_indices = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH | BRW_NEW_INDICES, + .cache = 0, + }, + .prepare = brw_prepare_indices, + .emit = brw_emit_indices, +}; diff --git a/src/mesa/drivers/dri/i965/brw_fallback.c b/src/mesa/drivers/dri/i965/brw_fallback.c index 8a8fb50cb9..693f68f32a 100644 --- a/src/mesa/drivers/dri/i965/brw_fallback.c +++ b/src/mesa/drivers/dri/i965/brw_fallback.c @@ -95,10 +95,9 @@ static GLboolean do_check_fallback(struct brw_context *brw) return GL_FALSE; } -static int check_fallback(struct brw_context *brw) +static void check_fallback(struct brw_context *brw) { brw->intel.Fallback = do_check_fallback(brw); - return 0; } const struct brw_tracked_state brw_check_fallback = { diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index 9419315c7a..2daef0093b 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -162,10 +162,9 @@ static void populate_key( struct brw_context *brw, /* Calculate interpolants for triangle and line rasterization. */ -static int prepare_gs_prog( struct brw_context *brw ) +static void prepare_gs_prog(struct brw_context *brw) { struct brw_gs_prog_key key; - int ret = 0; /* Populate the key: */ populate_key(brw, &key); @@ -183,11 +182,7 @@ static int prepare_gs_prog( struct brw_context *brw ) &brw->gs.prog_data); if (brw->gs.prog_bo == NULL) compile_gs_prog( brw, &key ); - - ret |= dri_bufmgr_check_aperture_space(brw->gs.prog_bo); } - - return ret; } diff --git a/src/mesa/drivers/dri/i965/brw_gs_state.c b/src/mesa/drivers/dri/i965/brw_gs_state.c index ae6b48a517..ff2e3ab059 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_state.c +++ b/src/mesa/drivers/dri/i965/brw_gs_state.c @@ -116,7 +116,7 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) return bo; } -static int prepare_gs_unit( struct brw_context *brw ) +static void prepare_gs_unit(struct brw_context *brw) { struct brw_gs_unit_key key; @@ -130,7 +130,6 @@ static int prepare_gs_unit( struct brw_context *brw ) if (brw->gs.state_bo == NULL) { brw->gs.state_bo = gs_unit_create_from_key(brw, &key); } - return dri_bufmgr_check_aperture_space(brw->gs.state_bo); } const struct brw_tracked_state brw_gs_unit = { diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 9634d649dd..487c638ce2 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -81,6 +81,13 @@ const struct brw_tracked_state brw_blend_constant_color = { static void upload_binding_table_pointers(struct brw_context *brw) { struct intel_context *intel = &brw->intel; + dri_bo *aper_array[] = { + intel->batch->buf, + brw->wm.bind_bo, + }; + + if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array))) + intel_batchbuffer_flush(intel->batch); BEGIN_BATCH(6, IGNORE_CLIPRECTS); OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2)); @@ -135,6 +142,19 @@ static void upload_pipelined_state_pointers(struct brw_context *brw ) static void upload_psp_urb_cbs(struct brw_context *brw ) { + struct intel_context *intel = &brw->intel; + dri_bo *aper_array[] = { + intel->batch->buf, + brw->vs.state_bo, + brw->gs.state_bo, + brw->clip.state_bo, + brw->wm.state_bo, + brw->cc.state_bo, + }; + + if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array))) + intel_batchbuffer_flush(intel->batch); + upload_pipelined_state_pointers(brw); brw_upload_urb_fence(brw); brw_upload_constant_buffer_state(brw); @@ -155,22 +175,6 @@ const struct brw_tracked_state brw_psp_urb_cbs = { .emit = upload_psp_urb_cbs, }; -/** - * Upload the depthbuffer offset and format. - * - * We have to do this per state validation as we need to emit the relocation - * in the batch buffer. - */ - -static int prepare_depthbuffer(struct brw_context *brw) -{ - struct intel_region *region = brw->state.depth_region; - - if (!region || !region->buffer) - return 0; - return dri_bufmgr_check_aperture_space(region->buffer); -} - static void emit_depthbuffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; @@ -192,6 +196,10 @@ static void emit_depthbuffer(struct brw_context *brw) ADVANCE_BATCH(); } else { unsigned int format; + dri_bo *aper_array[] = { + intel->batch->buf, + region->buffer + }; switch (region->cpp) { case 2: @@ -208,6 +216,9 @@ static void emit_depthbuffer(struct brw_context *brw) return; } + if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array))) + intel_batchbuffer_flush(intel->batch); + BEGIN_BATCH(len, IGNORE_CLIPRECTS); OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2)); OUT_BATCH(((region->pitch * region->cpp) - 1) | @@ -236,7 +247,6 @@ const struct brw_tracked_state brw_depthbuffer = { .brw = BRW_NEW_DEPTH_BUFFER | BRW_NEW_BATCH, .cache = 0, }, - .prepare = prepare_depthbuffer, .emit = emit_depthbuffer, }; diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index 0b61748321..5bb9e11310 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -125,7 +125,7 @@ static void compile_sf_prog( struct brw_context *brw, /* Calculate interpolants for triangle and line rasterization. */ -static int upload_sf_prog( struct brw_context *brw ) +static void upload_sf_prog(struct brw_context *brw) { struct brw_sf_prog_key key; @@ -174,7 +174,6 @@ static int upload_sf_prog( struct brw_context *brw ) &brw->sf.prog_data); if (brw->sf.prog_bo == NULL) compile_sf_prog( brw, &key ); - return dri_bufmgr_check_aperture_space(brw->sf.prog_bo); } diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index cbed301d31..2478872b82 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -37,7 +37,7 @@ #include "macros.h" #include "intel_fbo.h" -static int upload_sf_vp(struct brw_context *brw) +static void upload_sf_vp(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF; @@ -98,8 +98,6 @@ static int upload_sf_vp(struct brw_context *brw) dri_bo_unreference(brw->sf.vp_bo); brw->sf.vp_bo = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0 ); - - return dri_bufmgr_check_aperture_space(brw->sf.vp_bo); } const struct brw_tracked_state brw_sf_vp = { @@ -269,11 +267,10 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, return bo; } -static int upload_sf_unit( struct brw_context *brw ) +static void upload_sf_unit( struct brw_context *brw ) { struct brw_sf_unit_key key; dri_bo *reloc_bufs[2]; - int ret = 0; sf_unit_populate_key(brw, &key); @@ -288,15 +285,6 @@ static int upload_sf_unit( struct brw_context *brw ) if (brw->sf.state_bo == NULL) { brw->sf.state_bo = sf_unit_create_from_key(brw, &key, reloc_bufs); } - - if (reloc_bufs[0]) - ret |= dri_bufmgr_check_aperture_space(reloc_bufs[0]); - - if (reloc_bufs[1]) - ret |= dri_bufmgr_check_aperture_space(reloc_bufs[1]); - - ret |= dri_bufmgr_check_aperture_space(brw->sf.state_bo); - return ret; } const struct brw_tracked_state brw_sf_unit = { diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index d1fca051ec..3ea6151ae9 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -80,6 +80,9 @@ const struct brw_tracked_state brw_pipe_control; const struct brw_tracked_state brw_clear_surface_cache; const struct brw_tracked_state brw_clear_batch_cache; +const struct brw_tracked_state brw_indices; +const struct brw_tracked_state brw_vertices; + /*********************************************************************** * brw_state_cache.c */ diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index b8dfcf5b03..d1d319d92e 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -101,6 +101,8 @@ const struct brw_tracked_state *atoms[] = &brw_psp_urb_cbs, #endif + &brw_indices, + &brw_vertices, NULL, /* brw_constant_buffer */ }; @@ -172,10 +174,12 @@ static void xor_states( struct brw_state_flags *result, /*********************************************************************** * Emit all state: */ -int brw_validate_state( struct brw_context *brw ) +void brw_validate_state( struct brw_context *brw ) { + struct intel_context *intel = &brw->intel; struct brw_state_flags *state = &brw->state.dirty; - GLuint i, ret, count; + GLuint i, count, pass = 0; + dri_bo *last_batch_bo = NULL; state->mesa |= brw->intel.NewGLState; brw->intel.NewGLState = 0; @@ -201,7 +205,7 @@ int brw_validate_state( struct brw_context *brw ) if (state->mesa == 0 && state->cache == 0 && state->brw == 0) - return 0; + return; if (brw->state.dirty.brw & BRW_NEW_CONTEXT) brw_clear_batch_cache_flush(brw); @@ -219,15 +223,23 @@ int brw_validate_state( struct brw_context *brw ) if (check_state(state, &atom->dirty)) { if (atom->prepare) { - ret = atom->prepare(brw); - if (ret) - return ret; + atom->prepare(brw); } } } if (brw->intel.Fallback) - return 0; + return; + + /* We're about to try to set up a coherent state in the batchbuffer for + * the emission of primitives. If we exceed the aperture size in any of the + * emit() calls, we need to go back to square 1 and try setting up again. + */ +got_flushed: + dri_bo_unreference(last_batch_bo); + last_batch_bo = intel->batch->buf; + dri_bo_reference(last_batch_bo); + assert(pass++ <= 2); if (INTEL_DEBUG) { /* Debug version which enforces various sanity checks on the @@ -250,8 +262,11 @@ int brw_validate_state( struct brw_context *brw ) break; if (check_state(state, &atom->dirty)) { - if (atom->emit) + if (atom->emit) { atom->emit( brw ); + if (intel->batch->buf != last_batch_bo) + goto got_flushed; + } } accumulate_state(&examined, &atom->dirty); @@ -273,13 +288,17 @@ int brw_validate_state( struct brw_context *brw ) break; if (check_state(state, &atom->dirty)) { - if (atom->emit) + if (atom->emit) { atom->emit( brw ); + if (intel->batch->buf != last_batch_bo) + goto got_flushed; + } } } } + dri_bo_unreference(last_batch_bo); + if (!brw->intel.Fallback) memset(state, 0, sizeof(*state)); - return 0; } diff --git a/src/mesa/drivers/dri/i965/brw_urb.c b/src/mesa/drivers/dri/i965/brw_urb.c index 244c82169a..1116ade0a4 100644 --- a/src/mesa/drivers/dri/i965/brw_urb.c +++ b/src/mesa/drivers/dri/i965/brw_urb.c @@ -74,7 +74,7 @@ static GLboolean check_urb_layout( struct brw_context *brw ) /* Most minimal update, forces re-emit of URB fence packet after GS * unit turned on/off. */ -static int recalculate_urb_fence( struct brw_context *brw ) +static void recalculate_urb_fence( struct brw_context *brw ) { GLuint csize = brw->curbe.total_size; GLuint vsize = brw->vs.prog_data->urb_entry_size; @@ -142,7 +142,6 @@ static int recalculate_urb_fence( struct brw_context *brw ) brw->state.dirty.brw |= BRW_NEW_URB_FENCE; } - return 0; } diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index f89b0e14a1..1db7ceebcf 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -83,7 +83,7 @@ static void do_vs_prog( struct brw_context *brw, } -static int brw_upload_vs_prog( struct brw_context *brw ) +static void brw_upload_vs_prog(struct brw_context *brw) { struct brw_vs_prog_key key; struct brw_vertex_program *vp = @@ -115,7 +115,6 @@ static int brw_upload_vs_prog( struct brw_context *brw ) &brw->vs.prog_data); if (brw->vs.prog_bo == NULL) do_vs_prog(brw, vp, &key); - return dri_bufmgr_check_aperture_space(brw->vs.prog_bo); } diff --git a/src/mesa/drivers/dri/i965/brw_vs_constval.c b/src/mesa/drivers/dri/i965/brw_vs_constval.c index a0106b8975..734a926e96 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_constval.c +++ b/src/mesa/drivers/dri/i965/brw_vs_constval.c @@ -166,7 +166,7 @@ static GLuint get_input_size(struct brw_context *brw, /* Calculate sizes of vertex program outputs. Size is the largest * component index which might vary from [0,0,0,1] */ -static int calc_wm_input_sizes( struct brw_context *brw ) +static void calc_wm_input_sizes( struct brw_context *brw ) { /* BRW_NEW_VERTEX_PROGRAM */ struct brw_vertex_program *vp = @@ -210,7 +210,6 @@ static int calc_wm_input_sizes( struct brw_context *brw ) memcpy(brw->wm.input_size_masks, t.size_masks, sizeof(t.size_masks)); brw->state.dirty.brw |= BRW_NEW_WM_INPUT_DIMENSIONS; } - return 0; } const struct brw_tracked_state brw_wm_input_sizes = { diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index e18cd42f4e..909b942610 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -124,7 +124,7 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) return bo; } -static int prepare_vs_unit( struct brw_context *brw ) +static void prepare_vs_unit(struct brw_context *brw) { struct brw_vs_unit_key key; @@ -138,7 +138,6 @@ static int prepare_vs_unit( struct brw_context *brw ) if (brw->vs.state_bo == NULL) { brw->vs.state_bo = vs_unit_create_from_key(brw, &key); } - return dri_bufmgr_check_aperture_space(brw->vs.state_bo); } const struct brw_tracked_state brw_vs_unit = { diff --git a/src/mesa/drivers/dri/i965/brw_vs_tnl.c b/src/mesa/drivers/dri/i965/brw_vs_tnl.c index e409620bbf..2caa020e61 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_tnl.c +++ b/src/mesa/drivers/dri/i965/brw_vs_tnl.c @@ -1581,7 +1581,7 @@ static GLuint hash_key( struct state_key *key ) return hash; } -static int prepare_tnl_program( struct brw_context *brw ) +static void prepare_tnl_program( struct brw_context *brw ) { GLcontext *ctx = &brw->intel.ctx; struct state_key key; @@ -1590,7 +1590,7 @@ static int prepare_tnl_program( struct brw_context *brw ) /* _NEW_PROGRAM */ if (brw->attribs.VertexProgram->_Current) - return 0; + return; /* Grab all the relevent state and put it in a single structure: */ @@ -1623,7 +1623,7 @@ static int prepare_tnl_program( struct brw_context *brw ) if (old != brw->tnl_program) brw->state.dirty.brw |= BRW_NEW_TNL_PROGRAM; - return 0; + return; } /* Note: See brw_draw.c - the vertex program must not rely on @@ -1649,7 +1649,7 @@ const struct brw_tracked_state brw_tnl_vertprog = { -static int prepare_active_vertprog( struct brw_context *brw ) +static void prepare_active_vertprog( struct brw_context *brw ) { const struct gl_vertex_program *prev = brw->vertex_program; @@ -1664,8 +1664,6 @@ static int prepare_active_vertprog( struct brw_context *brw ) if (brw->vertex_program != prev) brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; - - return 0; } diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index acbaf178d4..93ae8dc693 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -325,7 +325,7 @@ static void brw_wm_populate_key( struct brw_context *brw, } -static int brw_prepare_wm_prog( struct brw_context *brw ) +static void brw_prepare_wm_prog(struct brw_context *brw) { struct brw_wm_prog_key key; struct brw_fragment_program *fp = (struct brw_fragment_program *) @@ -342,8 +342,6 @@ static int brw_prepare_wm_prog( struct brw_context *brw ) &brw->wm.prog_data); if (brw->wm.prog_bo == NULL) do_wm_prog(brw, fp, &key); - - return dri_bufmgr_check_aperture_space(brw->wm.prog_bo); } diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index 461f977aac..08d01823de 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -255,11 +255,10 @@ brw_wm_sampler_populate_key(struct brw_context *brw, * complicates various things. However, this is still too confusing - * FIXME: simplify all the different new texture state flags. */ -static int upload_wm_samplers( struct brw_context *brw ) +static void upload_wm_samplers( struct brw_context *brw ) { struct wm_sampler_key key; int i; - int ret = 0; brw_wm_sampler_populate_key(brw, &key); @@ -271,7 +270,7 @@ static int upload_wm_samplers( struct brw_context *brw ) dri_bo_unreference(brw->wm.sampler_bo); brw->wm.sampler_bo = NULL; if (brw->wm.sampler_count == 0) - return 0; + return; brw->wm.sampler_bo = brw_search_cache(&brw->cache, BRW_SAMPLER, &key, sizeof(key), @@ -304,7 +303,6 @@ static int upload_wm_samplers( struct brw_context *brw ) if (!brw->attribs.Texture->Unit[i]._ReallyEnabled) continue; - ret |= dri_bufmgr_check_aperture_space(brw->wm.sdc_bo[i]); intel_bo_emit_reloc(brw->wm.sampler_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0, @@ -313,10 +311,6 @@ static int upload_wm_samplers( struct brw_context *brw ) brw->wm.sdc_bo[i]); } } - - ret |= dri_bufmgr_check_aperture_space(brw->wm.sampler_bo); - return ret; - } const struct brw_tracked_state brw_wm_samplers = { diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 6fe30f0a9a..f97d0dc285 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -227,12 +227,11 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, } -static int upload_wm_unit( struct brw_context *brw ) +static void upload_wm_unit( struct brw_context *brw ) { struct intel_context *intel = &brw->intel; struct brw_wm_unit_key key; dri_bo *reloc_bufs[3]; - int ret = 0, i; wm_unit_populate_key(brw, &key); /* Allocate the necessary scratch space if we haven't already. Don't @@ -267,12 +266,6 @@ static int upload_wm_unit( struct brw_context *brw ) if (brw->wm.state_bo == NULL) { brw->wm.state_bo = wm_unit_create_from_key(brw, &key, reloc_bufs); } - - for (i = 0; i < 3; i++) - if (reloc_bufs[i]) - ret |= dri_bufmgr_check_aperture_space(reloc_bufs[i]); - ret |= dri_bufmgr_check_aperture_space(brw->wm.state_bo); - return ret; } const struct brw_tracked_state brw_wm_unit = { diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 761a5df33f..0f5ba46a19 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -229,7 +229,7 @@ brw_create_texture_surface( struct brw_context *brw, return bo; } -static int +static void brw_update_texture_surface( GLcontext *ctx, GLuint unit ) { struct brw_context *brw = brw_context(ctx); @@ -237,7 +237,6 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) struct intel_texture_object *intelObj = intel_texture_object(tObj); struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel]; struct brw_wm_surface_key key; - int ret = 0; memset(&key, 0, sizeof(key)); key.target = tObj->Target; @@ -253,8 +252,6 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) key.depth = firstImage->Depth; key.tiling = intelObj->mt->region->tiling; - ret |= dri_bufmgr_check_aperture_space(key.bo); - dri_bo_unreference(brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS]); brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, &key, sizeof(key), @@ -263,9 +260,6 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) if (brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] == NULL) { brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] = brw_create_texture_surface(brw, &key); } - - ret |= dri_bufmgr_check_aperture_space(brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS]); - return ret; } /** @@ -273,12 +267,11 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) * While it is only used for the front/back buffer currently, it should be * usable for further buffers when doing ARB_draw_buffer support. */ -static int +static void brw_update_region_surface(struct brw_context *brw, struct intel_region *region, unsigned int unit, GLboolean cached) { dri_bo *region_bo = NULL; - int ret = 0; struct { unsigned int surface_type; unsigned int surface_format; @@ -302,8 +295,6 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, key.width = region->pitch; /* XXX: not really! */ key.height = region->height; key.cpp = region->cpp; - - ret |= dri_bufmgr_check_aperture_space(region->buffer); } else { key.surface_type = BRW_SURFACE_NULL; key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; @@ -367,10 +358,6 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, region_bo); } } - - ret |= dri_bufmgr_check_aperture_space(brw->wm.surf_bo[unit]); - - return ret; } @@ -422,23 +409,19 @@ brw_wm_get_binding_table(struct brw_context *brw) return bind_bo; } -static int prepare_wm_surfaces(struct brw_context *brw ) +static void prepare_wm_surfaces(struct brw_context *brw ) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = &brw->intel; - GLuint i, ret; + GLuint i; if (brw->state.nr_draw_regions > 1) { for (i = 0; i < brw->state.nr_draw_regions; i++) { - ret = brw_update_region_surface(brw, brw->state.draw_regions[i], i, - GL_FALSE); - if (ret) - return ret; + brw_update_region_surface(brw, brw->state.draw_regions[i], i, + GL_FALSE); } }else { - ret = brw_update_region_surface(brw, brw->state.draw_regions[0], 0, GL_TRUE); - if (ret) - return ret; + brw_update_region_surface(brw, brw->state.draw_regions[0], 0, GL_TRUE); } brw->wm.nr_surfaces = MAX_DRAW_BUFFERS; @@ -454,11 +437,8 @@ static int prepare_wm_surfaces(struct brw_context *brw ) dri_bo_reference(brw->wm.surf_bo[i+MAX_DRAW_BUFFERS]); brw->wm.nr_surfaces = i + MAX_DRAW_BUFFERS + 1; } else { - ret = brw_update_texture_surface(ctx, i); + brw_update_texture_surface(ctx, i); brw->wm.nr_surfaces = i + MAX_DRAW_BUFFERS + 1; - - if (ret) - return ret; } } else { dri_bo_unreference(brw->wm.surf_bo[i+MAX_DRAW_BUFFERS]); @@ -469,8 +449,6 @@ static int prepare_wm_surfaces(struct brw_context *brw ) dri_bo_unreference(brw->wm.bind_bo); brw->wm.bind_bo = brw_wm_get_binding_table(brw); - - return dri_bufmgr_check_aperture_space(brw->wm.bind_bo); } diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index 9ad9f6a6c0..5afaad070c 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -94,10 +94,6 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch) batch->ptr = batch->map; batch->dirty_state = ~0; batch->cliprect_mode = IGNORE_CLIPRECTS; - - /* account batchbuffer in aperture */ - dri_bufmgr_check_aperture_space(batch->buf); - } struct intel_batchbuffer * diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index e9076f9ac9..7129a4ba91 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -54,7 +54,6 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv, struct intel_context *intel; const intelScreenPrivate *intelScreen; - int ret; DBG("%s\n", __FUNCTION__); @@ -81,6 +80,7 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv, unsigned short src_x, src_y; int BR13, CMD; int i; + dri_bo *aper_array[3]; src = intel_get_rb_region(&intel_fb->Base, BUFFER_BACK_LEFT); dst = intel_get_rb_region(&intel_fb->Base, BUFFER_FRONT_LEFT); @@ -116,16 +116,18 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv, } #endif /* do space/cliprects check before going any further */ - intel_batchbuffer_require_space(intel->batch, 8 * 4, REFERENCES_CLIPRECTS); + intel_batchbuffer_require_space(intel->batch, 8 * 4, + REFERENCES_CLIPRECTS); again: - ret = dri_bufmgr_check_aperture_space(dst->buffer); - ret |= dri_bufmgr_check_aperture_space(src->buffer); - - if (ret) { + aper_array[0] = intel->batch->buf; + aper_array[1] = dst->buffer; + aper_array[2] = src->buffer; + + if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) { intel_batchbuffer_flush(intel->batch); goto again; } - + for (i = 0; i < nbox; i++, pbox++) { drm_clip_rect_t box = *pbox; @@ -273,17 +275,19 @@ intelEmitCopyBlit(struct intel_context *intel, GLuint CMD, BR13; int dst_y2 = dst_y + h; int dst_x2 = dst_x + w; - int ret; + dri_bo *aper_array[3]; BATCH_LOCALS; /* do space/cliprects check before going any further */ intel_batchbuffer_require_space(intel->batch, 8 * 4, NO_LOOP_CLIPRECTS); again: - ret = dri_bufmgr_check_aperture_space(dst_buffer); - ret |= dri_bufmgr_check_aperture_space(src_buffer); - if (ret) { - intel_batchbuffer_flush(intel->batch); - goto again; + aper_array[0] = intel->batch->buf; + aper_array[1] = dst_buffer; + aper_array[2] = src_buffer; + + if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) { + intel_batchbuffer_flush(intel->batch); + goto again; } DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index f1116d2747..f9a373cf74 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -294,6 +294,7 @@ extern char *__progname; #define SUBPIXEL_X 0.125 #define SUBPIXEL_Y 0.125 +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) #define ALIGN(value, alignment) ((value + alignment - 1) & ~(alignment - 1)) #define INTEL_FIREVERTICES(intel) \ -- cgit v1.2.3