Discussion:
[Mesa-dev] [PATCH 1/2] r600: kill off native_integer shader ctx flag
s***@vmware.com
2017-12-23 07:19:56 UTC
Permalink
From: Roland Scheidegger <***@vmware.com>

Maybe upon a time it wasn't always true.
---
src/gallium/drivers/r600/r600_shader.c | 18 ------------------
1 file changed, 18 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 06d7ca02e9..6cdbfd3063 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -350,7 +350,6 @@ struct r600_shader_ctx {
int cs_grid_size_reg;
bool cs_block_size_loaded, cs_grid_size_loaded;
int fragcoord_input;
- int native_integers;
int next_ring_offset;
int gs_out_ring_offset;
int gs_next_vertex;
@@ -998,22 +997,6 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
d->Semantic.Name == TGSI_SEMANTIC_SAMPLEPOS) {
break; /* Already handled from allocate_system_value_inputs */
} else if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
- if (!ctx->native_integers) {
- struct r600_bytecode_alu alu;
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
-
- alu.op = ALU_OP1_INT_TO_FLT;
- alu.src[0].sel = 0;
- alu.src[0].chan = 3;
-
- alu.dst.sel = 0;
- alu.dst.chan = 3;
- alu.dst.write = 1;
- alu.last = 1;
-
- if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
- return r;
- }
break;
} else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID)
break;
@@ -3128,7 +3111,6 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,

ctx.bc = &shader->bc;
ctx.shader = shader;
- ctx.native_integers = true;

r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family,
rscreen->has_compressed_msaa_texturing);
--
2.12.3
s***@vmware.com
2017-12-23 07:19:57 UTC
Permalink
From: Roland Scheidegger <***@vmware.com>

piglit doesn't care, but I'm quite confident that the size actually bound
as range should be reported and not the base size of the resource.
Also, the array in the constant buffer looks overallocated by a factor of 4.
For eg, also decrease the size by another factor of 2 by using the same
constant slot for both buffer size (required for txq for TBOs) and the number
of layers for cube arrays, as these are mutually exclusive. Could of course use
some more logic and only actually do this for the samplers/images/buffers where
it's required rather than for all, but ah well...
(FWIW I believe the txq for TBOs would be fixable on EG without using a
constant buffer by using the GET_BUFFER_RESINFO vc fetch, but for cube map
arrays we'd still need the buffer as it's unfixable since the hw requires
always 0 unfortunately.)
---
src/gallium/drivers/r600/r600_shader.c | 18 +++++++-------
src/gallium/drivers/r600/r600_state_common.c | 35 +++++++++++++++++-----------
2 files changed, 31 insertions(+), 22 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 6cdbfd3063..8a63621c2f 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -6955,9 +6955,9 @@ static int r600_do_buffer_txq(struct r600_shader_ctx *ctx, int reg_idx, int offs
alu.op = ALU_OP1_MOV;
alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL;
if (ctx->bc->chip_class >= EVERGREEN) {
- /* channel 0 or 2 of each word */
- alu.src[0].sel += (id / 2);
- alu.src[0].chan = (id % 2) * 2;
+ /* with eg each dword is either buf size or number of cubes */
+ alu.src[0].sel += id / 4;
+ alu.src[0].chan = id % 4;
} else {
/* r600 we have them at channel 2 of the second dword */
alu.src[0].sel += (id * 2) + 1;
@@ -7615,9 +7615,9 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)

alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL;
if (ctx->bc->chip_class >= EVERGREEN) {
- /* channel 1 or 3 of each word */
- alu.src[0].sel += (id / 2);
- alu.src[0].chan = ((id % 2) * 2) + 1;
+ /* with eg each dword is either buf size or number of cubes */
+ alu.src[0].sel += id / 4;
+ alu.src[0].chan = id % 4;
} else {
/* r600 we have them at channel 2 of the second dword */
alu.src[0].sel += (id * 2) + 1;
@@ -8782,9 +8782,9 @@ static int tgsi_resq(struct r600_shader_ctx *ctx)
alu.op = ALU_OP1_MOV;

alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL;
- /* channel 1 or 3 of each word */
- alu.src[0].sel += (id / 2);
- alu.src[0].chan = ((id % 2) * 2) + 1;
+ /* with eg each dword is either buf size or number of cubes */
+ alu.src[0].sel += id / 4;
+ alu.src[0].chan = id % 4;
alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
alu.last = 1;
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index e5a5a33367..e9996cb3fa 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -902,7 +902,6 @@ struct r600_pipe_shader_selector *r600_create_shader_state_tokens(struct pipe_co
unsigned pipe_shader_type)
{
struct r600_pipe_shader_selector *sel = CALLOC_STRUCT(r600_pipe_shader_selector);
- int i;

sel->type = pipe_shader_type;
sel->tokens = tgsi_dup_tokens(tokens);
@@ -1326,7 +1325,7 @@ static void r600_setup_buffer_constants(struct r600_context *rctx, int shader_ty
samplers->views.dirty_buffer_constants = FALSE;

bits = util_last_bit(samplers->views.enabled_mask);
- array_size = bits * 8 * sizeof(uint32_t) * 4;
+ array_size = bits * 8 * sizeof(uint32_t);

constants = r600_alloc_buf_consts(rctx, shader_type, array_size, &base_offset);

@@ -1349,7 +1348,8 @@ static void r600_setup_buffer_constants(struct r600_context *rctx, int shader_ty
} else
constants[offset + 4] = 0;

- constants[offset + 5] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format);
+ constants[offset + 5] = samplers->views.views[i]->base.u.buf.size /
+ util_format_get_blocksize(samplers->views.views[i]->base.format);
constants[offset + 6] = samplers->views.views[i]->base.texture->array_size / 6;
}
}
@@ -1396,25 +1396,33 @@ void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type)
img_bits = bits;
if (buffers)
bits += util_last_bit(buffers->enabled_mask);
- array_size = bits * 2 * sizeof(uint32_t) * 4;
+ array_size = bits * sizeof(uint32_t);

constants = r600_alloc_buf_consts(rctx, shader_type, array_size,
&base_offset);

for (i = 0; i < sview_bits; i++) {
if (samplers->views.enabled_mask & (1 << i)) {
- uint32_t offset = (base_offset / 4) + i * 2;
- constants[offset] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format);
- constants[offset + 1] = samplers->views.views[i]->base.texture->array_size / 6;
+ uint32_t offset = (base_offset / 4) + i;
+ if (samplers->views.views[i]->base.target == PIPE_BUFFER) {
+ constants[offset] = samplers->views.views[i]->base.u.buf.size /
+ util_format_get_blocksize(samplers->views.views[i]->base.format);
+ } else {
+ constants[offset] = samplers->views.views[i]->base.texture->array_size / 6;
+ }
}
}
if (images) {
for (i = sview_bits; i < img_bits; i++) {
int idx = i - sview_bits;
if (images->enabled_mask & (1 << idx)) {
- uint32_t offset = (base_offset / 4) + i * 2;
- constants[offset] = images->views[i].base.resource->width0 / util_format_get_blocksize(images->views[i].base.format);
- constants[offset + 1] = images->views[i].base.resource->array_size / 6;
+ uint32_t offset = (base_offset / 4) + i;
+ if (images->views[i].base.resource->target == PIPE_BUFFER) {
+ constants[offset] = images->views[i].base.u.buf.size /
+ util_format_get_blocksize(images->views[i].base.format);
+ } else {
+ constants[offset] = images->views[i].base.resource->array_size / 6;
+ }
}
}
}
@@ -1422,9 +1430,10 @@ void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type)
for (i = img_bits; i < bits; i++) {
int idx = i - img_bits;
if (buffers->enabled_mask & (1 << idx)) {
- uint32_t offset = (base_offset / 4) + i * 2;
- constants[offset] = buffers->views[i].base.resource->width0 / util_format_get_blocksize(buffers->views[i].base.format);
- constants[offset + 1] = 0;
+ uint32_t offset = (base_offset / 4) + i;
+ assert(buffers->views[i].base.resource->target == PIPE_BUFFER);
+ constants[offset] = buffers->views[i].base.u.buf.size /
+ util_format_get_blocksize(buffers->views[i].base.format);
}
}
}
--
2.12.3
Dave Airlie
2017-12-28 00:51:02 UTC
Permalink
Post by s***@vmware.com
piglit doesn't care, but I'm quite confident that the size actually bound
as range should be reported and not the base size of the resource.
Also, the array in the constant buffer looks overallocated by a factor of 4.
For eg, also decrease the size by another factor of 2 by using the same
constant slot for both buffer size (required for txq for TBOs) and the number
of layers for cube arrays, as these are mutually exclusive. Could of course use
some more logic and only actually do this for the samplers/images/buffers where
it's required rather than for all, but ah well...
(FWIW I believe the txq for TBOs would be fixable on EG without using a
constant buffer by using the GET_BUFFER_RESINFO vc fetch, but for cube map
arrays we'd still need the buffer as it's unfixable since the hw requires
always 0 unfortunately.)
For the series:

Reviewed-by: Dave Airlie <***@redhat.com>
from previous experiments the hw doesn't work at all using vc fetch or
otherwise.

Dave.
Alex Deucher
2017-12-28 01:16:55 UTC
Permalink
Post by Dave Airlie
Post by s***@vmware.com
piglit doesn't care, but I'm quite confident that the size actually bound
as range should be reported and not the base size of the resource.
Also, the array in the constant buffer looks overallocated by a factor of 4.
For eg, also decrease the size by another factor of 2 by using the same
constant slot for both buffer size (required for txq for TBOs) and the number
of layers for cube arrays, as these are mutually exclusive. Could of course use
some more logic and only actually do this for the samplers/images/buffers where
it's required rather than for all, but ah well...
(FWIW I believe the txq for TBOs would be fixable on EG without using a
constant buffer by using the GET_BUFFER_RESINFO vc fetch, but for cube map
arrays we'd still need the buffer as it's unfixable since the hw requires
always 0 unfortunately.)
from previous experiments the hw doesn't work at all using vc fetch or
otherwise.
vc vs tc fetch just determines which cache is used. vc was slower
than tc and was not present on some asics and was eventually removed
altogether.

Alex
Post by Dave Airlie
Dave.
_______________________________________________
mesa-dev mailing list
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Roland Scheidegger
2017-12-29 19:25:17 UTC
Permalink
Post by Alex Deucher
Post by Dave Airlie
Post by s***@vmware.com
piglit doesn't care, but I'm quite confident that the size actually bound
as range should be reported and not the base size of the resource.
Also, the array in the constant buffer looks overallocated by a factor of 4.
For eg, also decrease the size by another factor of 2 by using the same
constant slot for both buffer size (required for txq for TBOs) and the number
of layers for cube arrays, as these are mutually exclusive. Could of course use
some more logic and only actually do this for the samplers/images/buffers where
it's required rather than for all, but ah well...
(FWIW I believe the txq for TBOs would be fixable on EG without using a
constant buffer by using the GET_BUFFER_RESINFO vc fetch, but for cube map
arrays we'd still need the buffer as it's unfixable since the hw requires
always 0 unfortunately.)
from previous experiments the hw doesn't work at all using vc fetch or
otherwise.
vc vs tc fetch just determines which cache is used. vc was slower
than tc and was not present on some asics and was eventually removed
altogether.
Not according to the docs for this specific instruction:
GET_BUFFER_RESINFO
Returns the number of elements in a buffer. This is a vertex fetch
instruction and uses a vertex constant. It can be used only by a texture
cache, not a vertex cache.

Albeit that restriction is only in the r800 isa docs, not NI (r600
doesn't support it in the first place).

But if it doesn't work either way, I'll just adjust the comment for the
patch...

Roland
Post by Alex Deucher
Alex
Post by Dave Airlie
Dave.
_______________________________________________
mesa-dev mailing list
https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.freedesktop.org_mailman_listinfo_mesa-2Ddev&d=DwIBaQ&c=uilaK90D4TOVoH58JNXRgQ&r=_QIjpv-UJ77xEQY8fIYoQtr5qv8wKrPJc7v7_-CYAb0&m=VFFmGFm56BHuS8VYK1t0LK0HZCTASA0LdgVXxHJZWYw&s=AbgSWv099aF3iDuTasfbRiWjEQo5f-kXmT9YFf0Nucg&e=
Loading...