VOOZH about

URL: https://lists.freedesktop.org/archives/mesa-dev/2014-November/070395.html

⇱ [Mesa-dev] [PATCH] r600g: Implement GL_ARB_draw_indirect


[Mesa-dev] [PATCH] r600g: Implement GL_ARB_draw_indirect

Glenn Kennard glenn.kennard at gmail.com
Sat Nov 8 14:52:46 PST 2014
Requires evergreen/cayman, and updated radeon kernel module.

Signed-off-by: Glenn Kennard <glenn.kennard at gmail.com>
---
See also kernel side patch sent to dri-devel at lists.freedesktop.org

 docs/GL3.txt | 4 +-
 docs/relnotes/10.4.html | 1 +
 src/gallium/drivers/r600/evergreend.h | 7 ++-
 src/gallium/drivers/r600/r600_pipe.c | 6 ++-
 src/gallium/drivers/r600/r600_state_common.c | 80 ++++++++++++++++++++++------
 5 files changed, 77 insertions(+), 21 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 2854431..06c52f9 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -95,7 +95,7 @@ GL 3.3, GLSL 3.30 --- all DONE: i965, nv50, nvc0, r600, radeonsi, llvmpipe, soft
 GL 4.0, GLSL 4.00:
 
 GL_ARB_draw_buffers_blend DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
- GL_ARB_draw_indirect DONE (i965, nvc0, radeonsi, llvmpipe, softpipe)
+ GL_ARB_draw_indirect DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe)
 GL_ARB_gpu_shader5 DONE (i965, nvc0)
 - 'precise' qualifier DONE
 - Dynamically uniform sampler array indices DONE (r600)
@@ -159,7 +159,7 @@ GL 4.3, GLSL 4.30:
 GL_ARB_framebuffer_no_attachments not started
 GL_ARB_internalformat_query2 not started
 GL_ARB_invalidate_subdata DONE (all drivers)
- GL_ARB_multi_draw_indirect DONE (i965, nvc0, radeonsi, llvmpipe, softpipe)
+ GL_ARB_multi_draw_indirect DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe)
 GL_ARB_program_interface_query not started
 GL_ARB_robust_buffer_access_behavior not started
 GL_ARB_shader_image_size not started
diff --git a/docs/relnotes/10.4.html b/docs/relnotes/10.4.html
index d0fbd3b..9c2a491 100644
--- a/docs/relnotes/10.4.html
+++ b/docs/relnotes/10.4.html
@@ -49,6 +49,7 @@ Note: some of the new features are only available with certain drivers.
 <li>GL_ARB_texture_view on nv50, nvc0</li>
 <li>GL_ARB_clip_control on llvmpipe, softpipe, r300, r600, radeonsi</li>
 <li>GL_KHR_context_flush_control on all drivers</li>
+<li>GL_ARB_draw_indirect, GL_ARB_multi_draw_indirect on r600</li>
 </ul>
 
 
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index 4989996..b8880c8 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -64,6 +64,8 @@
 #define R600_TEXEL_PITCH_ALIGNMENT_MASK 0x7
 
 #define PKT3_NOP 0x10
+#define PKT3_SET_BASE 0x11
+#define PKT3_INDEX_BUFFER_SIZE 0x13
 #define PKT3_DEALLOC_STATE 0x14
 #define PKT3_DISPATCH_DIRECT 0x15
 #define PKT3_DISPATCH_INDIRECT 0x16
@@ -72,12 +74,15 @@
 #define PKT3_REG_RMW 0x21
 #define PKT3_COND_EXEC 0x22
 #define PKT3_PRED_EXEC 0x23
-#define PKT3_START_3D_CMDBUF 0x24
+#define PKT3_DRAW_INDIRECT 0x24
+#define PKT3_DRAW_INDEX_INDIRECT 0x25
+#define PKT3_INDEX_BASE 0x26
 #define PKT3_DRAW_INDEX_2 0x27
 #define PKT3_CONTEXT_CONTROL 0x28
 #define PKT3_DRAW_INDEX_IMMD_BE 0x29
 #define PKT3_INDEX_TYPE 0x2A
 #define PKT3_DRAW_INDEX 0x2B
+#define PKT3_DRAW_INDIRECT_MULTI 0x2C
 #define PKT3_DRAW_INDEX_AUTO 0x2D
 #define PKT3_DRAW_INDEX_IMMD 0x2E
 #define PKT3_NUM_INSTANCES 0x2F
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 0b571e4..829deaf 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -313,6 +313,11 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 		return family >= CHIP_CEDAR ? 1 : 0;
 	case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
 		return family >= CHIP_CEDAR ? 4 : 0;
+	case PIPE_CAP_DRAW_INDIRECT:
+		/* needs kernel command checking support to work */
+		if (family >= CHIP_CEDAR && rscreen->b.info.drm_minor >= 41)
+			return 1;
+		return 0;
 
 	/* Unsupported features. */
 	case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
@@ -322,7 +327,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_VERTEX_COLOR_CLAMPED:
 	case PIPE_CAP_USER_VERTEX_BUFFERS:
 	case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
-	case PIPE_CAP_DRAW_INDIRECT:
 	case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
 	case PIPE_CAP_SAMPLER_VIEW_TARGET:
 		return 0;
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index c3f21cb..649bf24 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1362,7 +1362,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 	unsigned i;
 	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
 
-	if (!info.count && (info.indexed || !info.count_from_stream_output)) {
+	if (!info.indirect && !info.count && (info.indexed || !info.count_from_stream_output)) {
 		return;
 	}
 
@@ -1391,7 +1391,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 		ib.offset = rctx->index_buffer.offset + info.start * ib.index_size;
 
 		/* Translate 8-bit indices to 16-bit. */
-		if (ib.index_size == 1) {
+		if (unlikely(ib.index_size == 1)) {
 			struct pipe_resource *out_buffer = NULL;
 			unsigned out_offset;
 			void *ptr;
@@ -1414,7 +1414,8 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 		 * and the indices are emitted via PKT3_DRAW_INDEX_IMMD.
 		 * Note: Instanced rendering in combination with immediate indices hangs. */
 		if (ib.user_buffer && (R600_BIG_ENDIAN || info.instance_count > 1 ||
-				 info.count*ib.index_size > 20)) {
+				 info.count*ib.index_size > 20 ||
+				 info.indirect)) {
 			u_upload_data(rctx->b.uploader, 0, info.count * ib.index_size,
 				 ib.user_buffer, &ib.offset, &ib.buffer);
 			ib.user_buffer = NULL;
@@ -1521,6 +1522,21 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 	/* Draw packets. */
 	cs->buf[cs->cdw++] = PKT3(PKT3_NUM_INSTANCES, 0, rctx->b.predicate_drawing);
 	cs->buf[cs->cdw++] = info.instance_count;
+
+	if (unlikely(info.indirect)) {
+		uint64_t va = r600_resource(info.indirect)->gpu_address;
+		assert(rctx->b.chip_class >= EVERGREEN);
+		cs->buf[cs->cdw++] = PKT3(0x11 /* PKT3_SET_BASE */, 2, rctx->b.predicate_drawing);
+		cs->buf[cs->cdw++] = 1; // 1 means DX11 Draw_Index_Indirect Patch Table Base
+		cs->buf[cs->cdw++] = va;
+		cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
+
+		cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
+		cs->buf[cs->cdw++] = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
+							 (struct r600_resource*)info.indirect,
+							 RADEON_USAGE_READ, RADEON_PRIO_MIN);
+	}
+
 	if (info.indexed) {
 		cs->buf[cs->cdw++] = PKT3(PKT3_INDEX_TYPE, 0, rctx->b.predicate_drawing);
 		cs->buf[cs->cdw++] = ib.index_size == 4 ?
@@ -1537,18 +1553,40 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 			cs->cdw += size_dw;
 		} else {
 			uint64_t va = r600_resource(ib.buffer)->gpu_address + ib.offset;
-			cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX, 3, rctx->b.predicate_drawing);
-			cs->buf[cs->cdw++] = va;
-			cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
-			cs->buf[cs->cdw++] = info.count;
-			cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_DMA;
-			cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
-			cs->buf[cs->cdw++] = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
-								 (struct r600_resource*)ib.buffer,
-								 RADEON_USAGE_READ, RADEON_PRIO_MIN);
+
+			if (likely(!info.indirect)) {
+				cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX, 3, rctx->b.predicate_drawing);
+				cs->buf[cs->cdw++] = va;
+				cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
+				cs->buf[cs->cdw++] = info.count;
+				cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_DMA;
+				cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
+				cs->buf[cs->cdw++] = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
+									 (struct r600_resource*)ib.buffer,
+									 RADEON_USAGE_READ, RADEON_PRIO_MIN);
+			}
+			else {
+				uint32_t max_size = (ib.buffer->width0 - ib.offset) / ib.index_size;
+
+				cs->buf[cs->cdw++] = PKT3(0x26 /* PKT3_INDEX_BASE */, 1, rctx->b.predicate_drawing);
+				cs->buf[cs->cdw++] = va;
+				cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
+
+				cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
+				cs->buf[cs->cdw++] = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
+									 (struct r600_resource*)ib.buffer,
+									 RADEON_USAGE_READ, RADEON_PRIO_MIN);
+
+				cs->buf[cs->cdw++] = PKT3(0x13 /* PKT3_INDEX_BUFFER_SIZE */, 0, rctx->b.predicate_drawing);
+				cs->buf[cs->cdw++] = max_size;
+
+				cs->buf[cs->cdw++] = PKT3(0x25 /* PKT3_DRAW_INDEX_INDIRECT */, 1, rctx->b.predicate_drawing);
+				cs->buf[cs->cdw++] = info.indirect_offset;
+				cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_DMA;
+			}
 		}
 	} else {
-		if (info.count_from_stream_output) {
+		if (unlikely(info.count_from_stream_output)) {
 			struct r600_so_target *t = (struct r600_so_target*)info.count_from_stream_output;
 			uint64_t va = t->buf_filled_size->gpu_address + t->buf_filled_size_offset;
 
@@ -1567,10 +1605,18 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 								 RADEON_PRIO_MIN);
 		}
 
-		cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, rctx->b.predicate_drawing);
-		cs->buf[cs->cdw++] = info.count;
-		cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_AUTO_INDEX |
-					(info.count_from_stream_output ? S_0287F0_USE_OPAQUE(1) : 0);
+		if (likely(!info.indirect)) {
+			cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, rctx->b.predicate_drawing);
+			cs->buf[cs->cdw++] = info.count;
+			cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_AUTO_INDEX |
+						(info.count_from_stream_output ? S_0287F0_USE_OPAQUE(1) : 0);
+		}
+		else {
+			cs->buf[cs->cdw++] = PKT3(0x24 /* PKT3_DRAW_INDIRECT */, 1, rctx->b.predicate_drawing);
+			cs->buf[cs->cdw++] = info.indirect_offset;
+			cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_AUTO_INDEX |
+						(info.count_from_stream_output ? S_0287F0_USE_OPAQUE(1) : 0);
+		}
 	}
 
 	if (rctx->screen->b.trace_bo) {
-- 
1.9.1



More information about the mesa-dev mailing list