Discussion:
[Mesa-dev] [PATCH 2/9] radv: Add GFX DCC decompress.
Bas Nieuwenhuizen
2017-12-29 02:06:10 UTC
Permalink
---
src/amd/vulkan/radv_meta_fast_clear.c | 94 ++++++++++++++++++++++++++++++-----
src/amd/vulkan/radv_private.h | 1 +
2 files changed, 83 insertions(+), 12 deletions(-)

diff --git a/src/amd/vulkan/radv_meta_fast_clear.c b/src/amd/vulkan/radv_meta_fast_clear.c
index 1acf510359d..44c2ff52617 100644
--- a/src/amd/vulkan/radv_meta_fast_clear.c
+++ b/src/amd/vulkan/radv_meta_fast_clear.c
@@ -250,7 +250,55 @@ create_pipeline(struct radv_device *device,
if (result != VK_SUCCESS)
goto cleanup_cmask;

+ result = radv_graphics_pipeline_create(device_h,
+ radv_pipeline_cache_to_handle(&device->meta_state.cache),
+ &(VkGraphicsPipelineCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .stageCount = 2,
+ .pStages = stages,
+
+ .pVertexInputState = &vi_state,
+ .pInputAssemblyState = &ia_state,
+
+ .pViewportState = &(VkPipelineViewportStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .viewportCount = 1,
+ .scissorCount = 1,
+ },
+ .pRasterizationState = &rs_state,
+ .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .rasterizationSamples = 1,
+ .sampleShadingEnable = false,
+ .pSampleMask = NULL,
+ .alphaToCoverageEnable = false,
+ .alphaToOneEnable = false,
+ },
+ .pColorBlendState = &blend_state,
+ .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .dynamicStateCount = 2,
+ .pDynamicStates = (VkDynamicState[]) {
+ VK_DYNAMIC_STATE_VIEWPORT,
+ VK_DYNAMIC_STATE_SCISSOR,
+ },
+ },
+ .layout = layout,
+ .renderPass = device->meta_state.fast_clear_flush.pass,
+ .subpass = 0,
+ },
+ &(struct radv_graphics_pipeline_create_info) {
+ .use_rectlist = true,
+ .custom_blend_mode = V_028808_CB_DCC_DECOMPRESS,
+ },
+ &device->meta_state.alloc,
+ &device->meta_state.fast_clear_flush.dcc_decompress_pipeline);
+ if (result != VK_SUCCESS)
+ goto cleanup_fmask;
+
goto cleanup;
+cleanup_fmask:
+ radv_DestroyPipeline(device_h, device->meta_state.fast_clear_flush.fmask_decompress_pipeline, &device->meta_state.alloc);
cleanup_cmask:
radv_DestroyPipeline(device_h, device->meta_state.fast_clear_flush.cmask_eliminate_pipeline, &device->meta_state.alloc);
cleanup:
@@ -263,17 +311,20 @@ radv_device_finish_meta_fast_clear_flush_state(struct radv_device *device)
{
struct radv_meta_state *state = &device->meta_state;

- radv_DestroyRenderPass(radv_device_to_handle(device),
- state->fast_clear_flush.pass, &state->alloc);
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- state->fast_clear_flush.p_layout,
- &state->alloc);
radv_DestroyPipeline(radv_device_to_handle(device),
- state->fast_clear_flush.cmask_eliminate_pipeline,
+ state->fast_clear_flush.dcc_decompress_pipeline,
&state->alloc);
radv_DestroyPipeline(radv_device_to_handle(device),
state->fast_clear_flush.fmask_decompress_pipeline,
&state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->fast_clear_flush.cmask_eliminate_pipeline,
+ &state->alloc);
+ radv_DestroyRenderPass(radv_device_to_handle(device),
+ state->fast_clear_flush.pass, &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device),
+ state->fast_clear_flush.p_layout,
+ &state->alloc);
}

VkResult
@@ -359,10 +410,11 @@ radv_emit_set_predication_state_from_image(struct radv_cmd_buffer *cmd_buffer,

/**
*/
-void
-radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *subresourceRange)
+static void
+radv_emit_color_decompress(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_image *image,
+ const VkImageSubresourceRange *subresourceRange,
+ bool decompress_dcc)
{
struct radv_meta_saved_state saved_state;
VkDevice device_h = radv_device_to_handle(cmd_buffer->device);
@@ -376,13 +428,15 @@ radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer,
RADV_META_SAVE_GRAPHICS_PIPELINE |
RADV_META_SAVE_PASS);

- if (image->fmask.size > 0) {
+ if (decompress_dcc && image->surface.dcc_size) {
+ pipeline = cmd_buffer->device->meta_state.fast_clear_flush.dcc_decompress_pipeline;
+ } else if (image->fmask.size > 0) {
pipeline = cmd_buffer->device->meta_state.fast_clear_flush.fmask_decompress_pipeline;
} else {
pipeline = cmd_buffer->device->meta_state.fast_clear_flush.cmask_eliminate_pipeline;
}

- if (image->surface.dcc_size) {
+ if (!decompress_dcc && image->surface.dcc_size) {
radv_emit_set_predication_state_from_image(cmd_buffer, image, true);
cmd_buffer->state.predicating = true;
}
@@ -454,3 +508,19 @@ radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer,
}
radv_meta_restore(&saved_state, cmd_buffer);
}
+
+void
+radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_image *image,
+ const VkImageSubresourceRange *subresourceRange)
+{
+ radv_emit_color_decompress(cmd_buffer, image, subresourceRange, false);
+}
+
+static void
+radv_decompress_dcc_gfx(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_image *image,
+ const VkImageSubresourceRange *subresourceRange)
+{
+ radv_emit_color_decompress(cmd_buffer, image, subresourceRange, true);
+}
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index a6959bfc0ae..4fb3c218eb3 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -490,6 +490,7 @@ struct radv_meta_state {
VkPipelineLayout p_layout;
VkPipeline cmask_eliminate_pipeline;
VkPipeline fmask_decompress_pipeline;
+ VkPipeline dcc_decompress_pipeline;
VkRenderPass pass;
} fast_clear_flush;
--
2.15.1
Bas Nieuwenhuizen
2017-12-29 02:06:11 UTC
Permalink
Simplifies failure paths. The caller already calls
radv_device_finish_meta_fast_clear_flush_state on failure.
---
src/amd/vulkan/radv_meta_fast_clear.c | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/src/amd/vulkan/radv_meta_fast_clear.c b/src/amd/vulkan/radv_meta_fast_clear.c
index 44c2ff52617..2603229a1f7 100644
--- a/src/amd/vulkan/radv_meta_fast_clear.c
+++ b/src/amd/vulkan/radv_meta_fast_clear.c
@@ -248,7 +248,7 @@ create_pipeline(struct radv_device *device,
&device->meta_state.alloc,
&device->meta_state.fast_clear_flush.fmask_decompress_pipeline);
if (result != VK_SUCCESS)
- goto cleanup_cmask;
+ goto cleanup;

result = radv_graphics_pipeline_create(device_h,
radv_pipeline_cache_to_handle(&device->meta_state.cache),
@@ -294,13 +294,10 @@ create_pipeline(struct radv_device *device,
&device->meta_state.alloc,
&device->meta_state.fast_clear_flush.dcc_decompress_pipeline);
if (result != VK_SUCCESS)
- goto cleanup_fmask;
+ goto cleanup;

goto cleanup;
-cleanup_fmask:
- radv_DestroyPipeline(device_h, device->meta_state.fast_clear_flush.fmask_decompress_pipeline, &device->meta_state.alloc);
-cleanup_cmask:
- radv_DestroyPipeline(device_h, device->meta_state.fast_clear_flush.cmask_eliminate_pipeline, &device->meta_state.alloc);
+
cleanup:
ralloc_free(fs_module.nir);
return result;
--
2.15.1
Bas Nieuwenhuizen
2017-12-29 02:06:12 UTC
Permalink
We do an in place copy where we read compressed and write decompressed.
By doing this in sizes that cover entire DCC blocks and waiting for all
reads in the block before starting to write we avoid corruption.

In the end we clear the DCC metadata to 0xffffffff.
---
src/amd/vulkan/radv_meta.h | 3 +
src/amd/vulkan/radv_meta_fast_clear.c | 268 ++++++++++++++++++++++++++++++++++
src/amd/vulkan/radv_private.h | 4 +
3 files changed, 275 insertions(+)

diff --git a/src/amd/vulkan/radv_meta.h b/src/amd/vulkan/radv_meta.h
index 3edf5fa6461..9f3198e8797 100644
--- a/src/amd/vulkan/radv_meta.h
+++ b/src/amd/vulkan/radv_meta.h
@@ -171,6 +171,9 @@ void radv_resummarize_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
void radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image,
const VkImageSubresourceRange *subresourceRange);
+void radv_decompress_dcc(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_image *image,
+ const VkImageSubresourceRange *subresourceRange);

void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *src_image,
diff --git a/src/amd/vulkan/radv_meta_fast_clear.c b/src/amd/vulkan/radv_meta_fast_clear.c
index 2603229a1f7..98e8f6ac18a 100644
--- a/src/amd/vulkan/radv_meta_fast_clear.c
+++ b/src/amd/vulkan/radv_meta_fast_clear.c
@@ -28,6 +28,160 @@
#include "radv_private.h"
#include "sid.h"

+
+static nir_shader *
+build_dcc_decompress_compute_shader(struct radv_device *dev)
+{
+ nir_builder b;
+ const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
+ false,
+ false,
+ GLSL_TYPE_FLOAT);
+ const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
+ false,
+ false,
+ GLSL_TYPE_FLOAT);
+ nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
+ b.shader->info.name = ralloc_strdup(b.shader, "dcc_decompress_compute");
+
+ /* We need at least 16/16/1 to cover an entire DCC block in a single workgroup. */
+ b.shader->info.cs.local_size[0] = 16;
+ b.shader->info.cs.local_size[1] = 16;
+ b.shader->info.cs.local_size[2] = 1;
+ nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
+ buf_type, "s_tex");
+ input_img->data.descriptor_set = 0;
+ input_img->data.binding = 0;
+
+ nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
+ img_type, "out_img");
+ output_img->data.descriptor_set = 0;
+ output_img->data.binding = 1;
+
+ nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
+ nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
+ nir_ssa_def *block_size = nir_imm_ivec4(&b,
+ b.shader->info.cs.local_size[0],
+ b.shader->info.cs.local_size[1],
+ b.shader->info.cs.local_size[2], 0);
+
+ nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+
+ nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
+ tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
+ tex->op = nir_texop_txf;
+ tex->src[0].src_type = nir_tex_src_coord;
+ tex->src[0].src = nir_src_for_ssa(nir_channels(&b, global_id, 3));
+ tex->src[1].src_type = nir_tex_src_lod;
+ tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
+ tex->dest_type = nir_type_float;
+ tex->is_array = false;
+ tex->coord_components = 2;
+ tex->texture = nir_deref_var_create(tex, input_img);
+ tex->sampler = NULL;
+
+ nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+ nir_builder_instr_insert(&b, &tex->instr);
+
+ nir_intrinsic_instr *membar = nir_intrinsic_instr_create(b.shader, nir_intrinsic_memory_barrier);
+ nir_builder_instr_insert(&b, &membar->instr);
+
+ nir_intrinsic_instr *bar = nir_intrinsic_instr_create(b.shader, nir_intrinsic_barrier);
+ nir_builder_instr_insert(&b, &bar->instr);
+
+ nir_ssa_def *outval = &tex->dest.ssa;
+ nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_store);
+ store->src[0] = nir_src_for_ssa(global_id);
+ store->src[1] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
+ store->src[2] = nir_src_for_ssa(outval);
+ store->variables[0] = nir_deref_var_create(store, output_img);
+
+ nir_builder_instr_insert(&b, &store->instr);
+ return b.shader;
+}
+
+static VkResult
+create_dcc_compress_compute(struct radv_device *device)
+{
+ VkResult result = VK_SUCCESS;
+ struct radv_shader_module cs = { .nir = NULL };
+
+ cs.nir = build_dcc_decompress_compute_shader(device);
+
+ VkDescriptorSetLayoutCreateInfo ds_create_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 2,
+ .pBindings = (VkDescriptorSetLayoutBinding[]) {
+ {
+ .binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL
+ },
+ {
+ .binding = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL
+ },
+ }
+ };
+
+ result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
+ &ds_create_info,
+ &device->meta_state.alloc,
+ &device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout);
+ if (result != VK_SUCCESS)
+ goto cleanup;
+
+
+ VkPipelineLayoutCreateInfo pl_create_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 1,
+ .pSetLayouts = &device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout,
+ .pushConstantRangeCount = 1,
+ .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 8},
+ };
+
+ result = radv_CreatePipelineLayout(radv_device_to_handle(device),
+ &pl_create_info,
+ &device->meta_state.alloc,
+ &device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout);
+ if (result != VK_SUCCESS)
+ goto cleanup;
+
+ /* compute shader */
+
+ VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = radv_shader_module_to_handle(&cs),
+ .pName = "main",
+ .pSpecializationInfo = NULL,
+ };
+
+ VkComputePipelineCreateInfo vk_pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .stage = pipeline_shader_stage,
+ .flags = 0,
+ .layout = device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout,
+ };
+
+ result = radv_CreateComputePipelines(radv_device_to_handle(device),
+ radv_pipeline_cache_to_handle(&device->meta_state.cache),
+ 1, &vk_pipeline_info, NULL,
+ &device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline);
+ if (result != VK_SUCCESS)
+ goto cleanup;
+
+cleanup:
+ ralloc_free(cs.nir);
+ return result;
+}
+
static VkResult
create_pass(struct radv_device *device)
{
@@ -322,6 +476,16 @@ radv_device_finish_meta_fast_clear_flush_state(struct radv_device *device)
radv_DestroyPipelineLayout(radv_device_to_handle(device),
state->fast_clear_flush.p_layout,
&state->alloc);
+
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->fast_clear_flush.dcc_decompress_compute_pipeline,
+ &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device),
+ state->fast_clear_flush.dcc_decompress_compute_p_layout,
+ &state->alloc);
+ radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
+ state->fast_clear_flush.dcc_decompress_compute_ds_layout,
+ &state->alloc);
}

VkResult
@@ -351,6 +515,10 @@ radv_device_init_meta_fast_clear_flush_state(struct radv_device *device)
if (res != VK_SUCCESS)
goto fail;

+ res = create_dcc_compress_compute(device);
+ if (res != VK_SUCCESS)
+ goto fail;
+
goto cleanup;

fail:
@@ -521,3 +689,103 @@ radv_decompress_dcc_gfx(struct radv_cmd_buffer *cmd_buffer,
{
radv_emit_color_decompress(cmd_buffer, image, subresourceRange, true);
}
+
+static void
+radv_decompress_dcc_compute(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_image *image,
+ const VkImageSubresourceRange *subresourceRange)
+{
+ struct radv_meta_saved_state saved_state;
+ struct radv_image_view iview = {0};
+ struct radv_device *device = cmd_buffer->device;
+
+ /* This assumes the image is 2d with 1 layer and 1 mipmap level */
+ struct radv_cmd_state *state = &cmd_buffer->state;
+
+ state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
+ RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+
+ radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_DESCRIPTORS |
+ RADV_META_SAVE_COMPUTE_PIPELINE);
+
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
+ VK_PIPELINE_BIND_POINT_COMPUTE,
+ device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline);
+
+ radv_image_view_init(&iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = radv_image_to_handle(image),
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = image->vk_format,
+ .subresourceRange = {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = 0,
+ .levelCount = 1,
+ .baseArrayLayer = 0,
+ .layerCount = 1
+ },
+ });
+
+ radv_meta_push_descriptor_set(cmd_buffer,
+ VK_PIPELINE_BIND_POINT_COMPUTE,
+ device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout,
+ 0, /* set */
+ 2, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]) {
+ {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .pImageInfo = (VkDescriptorImageInfo[]) {
+ {
+ .sampler = VK_NULL_HANDLE,
+ .imageView = radv_image_view_to_handle(&iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ }
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 1,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .pImageInfo = (VkDescriptorImageInfo[]) {
+ {
+ .sampler = VK_NULL_HANDLE,
+ .imageView = radv_image_view_to_handle(&iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ }
+ }
+ });
+
+ radv_unaligned_dispatch(cmd_buffer, image->info.width, image->info.height, 1);
+
+ /* The fill buffer below does its own saving */
+ radv_meta_restore(&saved_state, cmd_buffer);
+
+ state->flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
+ RADV_CMD_FLAG_INV_VMEM_L1;
+
+ state->flush_bits |= radv_fill_buffer(cmd_buffer, image->bo,
+ image->offset + image->dcc_offset,
+ image->surface.dcc_size, 0xffffffff);
+
+ state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
+ RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+}
+
+void
+radv_decompress_dcc(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_image *image,
+ const VkImageSubresourceRange *subresourceRange)
+{
+ if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL)
+ radv_decompress_dcc_gfx(cmd_buffer, image, subresourceRange);
+ else
+ radv_decompress_dcc_compute(cmd_buffer, image, subresourceRange);
+}
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 4fb3c218eb3..d7e9070fbb8 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -492,6 +492,10 @@ struct radv_meta_state {
VkPipeline fmask_decompress_pipeline;
VkPipeline dcc_decompress_pipeline;
VkRenderPass pass;
+
+ VkDescriptorSetLayout dcc_decompress_compute_ds_layout;
+ VkPipelineLayout dcc_decompress_compute_p_layout;
+ VkPipeline dcc_decompress_compute_pipeline;
} fast_clear_flush;

struct {
--
2.15.1
Bas Nieuwenhuizen
2017-12-29 02:06:14 UTC
Permalink
It should already be valid there + the RB will update it during
rendering.
---
src/amd/vulkan/radv_meta_resolve_fs.c | 5 -----
1 file changed, 5 deletions(-)

diff --git a/src/amd/vulkan/radv_meta_resolve_fs.c b/src/amd/vulkan/radv_meta_resolve_fs.c
index 798129ec854..99314d94e53 100644
--- a/src/amd/vulkan/radv_meta_resolve_fs.c
+++ b/src/amd/vulkan/radv_meta_resolve_fs.c
@@ -629,13 +629,8 @@ radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer)
continue;

struct radv_image_view *dest_iview = cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment;
- struct radv_image *dst_img = dest_iview->image;
struct radv_image_view *src_iview = cmd_buffer->state.framebuffer->attachments[src_att.attachment].attachment;

- if (dst_img->surface.dcc_size) {
- radv_initialize_dcc(cmd_buffer, dst_img, 0xffffffff);
- cmd_buffer->state.attachments[dest_att.attachment].current_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
- }
{
VkImageSubresourceRange range;
range.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
--
2.15.1
Bas Nieuwenhuizen
2017-12-29 02:06:16 UTC
Permalink
If both source and destination are DCC compressed, and their formats
are not compatible, we need to decompress one of them to make
sure we can do reinterpretation (which needs src format == dst format)
.
---
src/amd/vulkan/radv_meta_copy.c | 27 +++++++++++++++++++++++++--
1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/src/amd/vulkan/radv_meta_copy.c b/src/amd/vulkan/radv_meta_copy.c
index 29951f2ba44..7f7ef22efc8 100644
--- a/src/amd/vulkan/radv_meta_copy.c
+++ b/src/amd/vulkan/radv_meta_copy.c
@@ -369,8 +369,31 @@ meta_copy_image(struct radv_cmd_buffer *cmd_buffer,
dest_image_layout,
&pRegions[r].dstSubresource);

- /* for DCC */
- b_src.format = b_dst.format;
+ uint32_t dst_queue_mask = radv_image_queue_family_mask(dest_image,
+ cmd_buffer->queue_family_index,
+ cmd_buffer->queue_family_index);
+ bool dst_compressed = radv_layout_dcc_compressed(dest_image, dest_image_layout, dst_queue_mask);
+ uint32_t src_queue_mask = radv_image_queue_family_mask(src_image,
+ cmd_buffer->queue_family_index,
+ cmd_buffer->queue_family_index);
+ bool src_compressed = radv_layout_dcc_compressed(src_image, src_image_layout, src_queue_mask);
+
+ if (!src_compressed || radv_dcc_formats_compatible(b_src.format, b_dst.format)) {
+ b_src.format = b_dst.format;
+ } else if (!dst_compressed) {
+ b_dst.format = b_src.format;
+ } else {
+ radv_decompress_dcc(cmd_buffer, dest_image, &(VkImageSubresourceRange) {
+ .aspectMask = pRegions[r].dstSubresource.aspectMask,
+ .baseMipLevel = pRegions[r].dstSubresource.mipLevel,
+ .levelCount = 1,
+ .baseArrayLayer = pRegions[r].dstSubresource.baseArrayLayer,
+ .layerCount = pRegions[r].dstSubresource.layerCount,
+ });
+ b_dst.format = b_src.format;
+ b_dst.current_layout = VK_IMAGE_LAYOUT_GENERAL;
+ }
+

/**
* From the Vulkan 1.0.6 spec: 18.4 Copying Data Between Buffers and Images
--
2.15.1
Bas Nieuwenhuizen
2017-12-29 02:06:15 UTC
Permalink
Apps can use this for render feedback loops, where things are
defined if they render each pixel only once. However, DCC fails
here, as the level of coherence is a block not a pixel, so disable it.

This is also going to help implementing other stuff.

Even if we optimize this later to only happen if there actually is
a loop (if possible at all ...), then the machinery is still useful
to exclude images accessible by the SDMA queue when that is implemented.
---
src/amd/vulkan/radv_cmd_buffer.c | 29 +++++++++++++++++++++++------
src/amd/vulkan/radv_image.c | 12 ++++++++++++
src/amd/vulkan/radv_meta_resolve.c | 10 ++++++++--
src/amd/vulkan/radv_private.h | 4 ++++
4 files changed, 47 insertions(+), 8 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 42468bceed2..c735d201802 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1184,10 +1184,20 @@ radv_emit_depth_biais(struct radv_cmd_buffer *cmd_buffer)
static void
radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer,
int index,
- struct radv_attachment_info *att)
+ struct radv_attachment_info *att,
+ struct radv_image *image,
+ VkImageLayout layout)
{
bool is_vi = cmd_buffer->device->physical_device->rad_info.chip_class >= VI;
struct radv_color_buffer_info *cb = &att->cb;
+ uint32_t cb_color_info = cb->cb_color_info;
+
+ if (!radv_layout_dcc_compressed(image, layout,
+ radv_image_queue_family_mask(image,
+ cmd_buffer->queue_family_index,
+ cmd_buffer->queue_family_index))) {
+ cb_color_info &= C_028C70_DCC_ENABLE;
+ }

if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11);
@@ -1195,7 +1205,7 @@ radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer,
radeon_emit(cmd_buffer->cs, cb->cb_color_base >> 32);
radeon_emit(cmd_buffer->cs, cb->cb_color_attrib2);
radeon_emit(cmd_buffer->cs, cb->cb_color_view);
- radeon_emit(cmd_buffer->cs, cb->cb_color_info);
+ radeon_emit(cmd_buffer->cs, cb_color_info);
radeon_emit(cmd_buffer->cs, cb->cb_color_attrib);
radeon_emit(cmd_buffer->cs, cb->cb_dcc_control);
radeon_emit(cmd_buffer->cs, cb->cb_color_cmask);
@@ -1215,7 +1225,7 @@ radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer,
radeon_emit(cmd_buffer->cs, cb->cb_color_pitch);
radeon_emit(cmd_buffer->cs, cb->cb_color_slice);
radeon_emit(cmd_buffer->cs, cb->cb_color_view);
- radeon_emit(cmd_buffer->cs, cb->cb_color_info);
+ radeon_emit(cmd_buffer->cs, cb_color_info);
radeon_emit(cmd_buffer->cs, cb->cb_color_attrib);
radeon_emit(cmd_buffer->cs, cb->cb_dcc_control);
radeon_emit(cmd_buffer->cs, cb->cb_color_cmask);
@@ -1461,13 +1471,15 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer)

int idx = subpass->color_attachments[i].attachment;
struct radv_attachment_info *att = &framebuffer->attachments[idx];
+ struct radv_image *image = att->attachment->image;
+ VkImageLayout layout = subpass->color_attachments[i].layout;

radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, att->attachment->bo, 8);

assert(att->attachment->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT);
- radv_emit_fb_color_state(cmd_buffer, i, att);
+ radv_emit_fb_color_state(cmd_buffer, i, att, image, layout);

- radv_load_color_clear_regs(cmd_buffer, att->attachment->image, i);
+ radv_load_color_clear_regs(cmd_buffer, image, i);
}

if(subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
@@ -3878,7 +3890,12 @@ static void radv_handle_dcc_image_transition(struct radv_cmd_buffer *cmd_buffer,
const VkImageSubresourceRange *range)
{
if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
- radv_initialize_dcc(cmd_buffer, image, 0x20202020u);
+ radv_initialize_dcc(cmd_buffer, image,
+ radv_layout_dcc_compressed(image, dst_layout, dst_queue_mask) ?
+ 0x20202020u : 0xffffffffu);
+ } else if (radv_layout_dcc_compressed(image, src_layout, src_queue_mask) &&
+ !radv_layout_dcc_compressed(image, dst_layout, dst_queue_mask)) {
+ radv_decompress_dcc(cmd_buffer, image, range);
} else if (radv_layout_can_fast_clear(image, src_layout, src_queue_mask) &&
!radv_layout_can_fast_clear(image, dst_layout, dst_queue_mask)) {
radv_fast_clear_flush_image_inplace(cmd_buffer, image, range);
diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
index aaf12bdcb16..6088928df80 100644
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -1113,6 +1113,18 @@ bool radv_layout_can_fast_clear(const struct radv_image *image,
queue_mask == (1u << RADV_QUEUE_GENERAL);
}

+bool radv_layout_dcc_compressed(const struct radv_image *image,
+ VkImageLayout layout,
+ unsigned queue_mask)
+{
+ /* Don't compress compute transfer dst, as image stores are not supported. */
+ if (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
+ (queue_mask & (1u << RADV_QUEUE_COMPUTE)))
+ return false;
+
+ return image->surface.num_dcc_levels > 0 && layout != VK_IMAGE_LAYOUT_GENERAL;
+}
+

unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family)
{
diff --git a/src/amd/vulkan/radv_meta_resolve.c b/src/amd/vulkan/radv_meta_resolve.c
index 26489b7834f..49326fe9d10 100644
--- a/src/amd/vulkan/radv_meta_resolve.c
+++ b/src/amd/vulkan/radv_meta_resolve.c
@@ -315,10 +315,15 @@ enum radv_resolve_method {

static void radv_pick_resolve_method_images(struct radv_image *src_image,
struct radv_image *dest_image,
+ VkImageLayout dest_image_layout,
+ struct radv_cmd_buffer *cmd_buffer,
enum radv_resolve_method *method)

{
- if (dest_image->surface.num_dcc_levels > 0) {
+ uint32_t queue_mask = radv_image_queue_family_mask(dest_image,
+ cmd_buffer->queue_family_index,
+ cmd_buffer->queue_family_index);
+ if (radv_layout_dcc_compressed(dest_image, dest_image_layout, queue_mask)) {
*method = RESOLVE_FRAGMENT;
} else if (dest_image->surface.micro_tile_mode != src_image->surface.micro_tile_mode) {
*method = RESOLVE_COMPUTE;
@@ -360,6 +365,7 @@ void radv_CmdResolveImage(
resolve_method = RESOLVE_COMPUTE;

radv_pick_resolve_method_images(src_image, dest_image,
+ dest_image_layout, cmd_buffer,
&resolve_method);

if (resolve_method == RESOLVE_FRAGMENT) {
@@ -577,7 +583,7 @@ radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer)
struct radv_image *dst_img = cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment->image;
struct radv_image *src_img = cmd_buffer->state.framebuffer->attachments[src_att.attachment].attachment->image;

- radv_pick_resolve_method_images(dst_img, src_img, &resolve_method);
+ radv_pick_resolve_method_images(dst_img, src_img, dest_att.layout, cmd_buffer, &resolve_method);
if (resolve_method == RESOLVE_FRAGMENT) {
break;
}
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index db8ea895e3a..eb5a64d2536 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -1355,6 +1355,10 @@ bool radv_layout_can_fast_clear(const struct radv_image *image,
VkImageLayout layout,
unsigned queue_mask);

+bool radv_layout_dcc_compressed(const struct radv_image *image,
+ VkImageLayout layout,
+ unsigned queue_mask);
+
static inline bool
radv_vi_dcc_enabled(const struct radv_image *image, unsigned level)
{
--
2.15.1
Bas Nieuwenhuizen
2017-12-29 02:06:13 UTC
Permalink
For fast clear eliminate and decompressions, we always use the most compressed
format.

For clears, the code already creates a renderpass on demand with the exact same
layout as specified.

Otherwise we start distinguishing between GENERAL and TRANSFER_DST_OPTIMAL.
---
src/amd/vulkan/radv_meta_blit.c | 76 ++++++++++++++++++----------------
src/amd/vulkan/radv_meta_blit2d.c | 77 +++++++++++++++++++----------------
src/amd/vulkan/radv_meta_fast_clear.c | 6 +--
src/amd/vulkan/radv_meta_resolve_fs.c | 74 +++++++++++++++++----------------
src/amd/vulkan/radv_private.h | 22 ++++++++--
5 files changed, 145 insertions(+), 110 deletions(-)

diff --git a/src/amd/vulkan/radv_meta_blit.c b/src/amd/vulkan/radv_meta_blit.c
index 1f5f6ff739d..3ff48498d80 100644
--- a/src/amd/vulkan/radv_meta_blit.c
+++ b/src/amd/vulkan/radv_meta_blit.c
@@ -325,11 +325,12 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
switch (src_iview->aspect_mask) {
case VK_IMAGE_ASPECT_COLOR_BIT: {
unsigned fs_key = radv_format_meta_fs_key(dest_image->vk_format);
+ unsigned dst_layout = radv_meta_dst_layout_from_layout(dest_image_layout);

radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
&(VkRenderPassBeginInfo) {
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
- .renderPass = device->meta_state.blit.render_pass[fs_key],
+ .renderPass = device->meta_state.blit.render_pass[fs_key][dst_layout],
.framebuffer = fb,
.renderArea = {
.offset = { dest_box.offset.x, dest_box.offset.y },
@@ -644,9 +645,11 @@ radv_device_finish_meta_blit_state(struct radv_device *device)
struct radv_meta_state *state = &device->meta_state;

for (unsigned i = 0; i < NUM_META_FS_KEYS; ++i) {
- radv_DestroyRenderPass(radv_device_to_handle(device),
- state->blit.render_pass[i],
- &state->alloc);
+ for (unsigned j = 0; j < RADV_META_DST_LAYOUT_COUNT; ++j) {
+ radv_DestroyRenderPass(radv_device_to_handle(device),
+ state->blit.render_pass[i][j],
+ &state->alloc);
+ }
radv_DestroyPipeline(radv_device_to_handle(device),
state->blit.pipeline_1d_src[i],
&state->alloc);
@@ -717,38 +720,41 @@ radv_device_init_meta_blit_color(struct radv_device *device,

for (unsigned i = 0; i < ARRAY_SIZE(pipeline_formats); ++i) {
unsigned key = radv_format_meta_fs_key(pipeline_formats[i]);
- result = radv_CreateRenderPass(radv_device_to_handle(device),
- &(VkRenderPassCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
- .attachmentCount = 1,
- .pAttachments = &(VkAttachmentDescription) {
- .format = pipeline_formats[i],
- .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
- .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
- .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
- .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- .subpassCount = 1,
- .pSubpasses = &(VkSubpassDescription) {
- .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
- .inputAttachmentCount = 0,
- .colorAttachmentCount = 1,
- .pColorAttachments = &(VkAttachmentReference) {
- .attachment = 0,
- .layout = VK_IMAGE_LAYOUT_GENERAL,
+ for(unsigned j = 0; j < RADV_META_DST_LAYOUT_COUNT; ++j) {
+ VkImageLayout layout = radv_meta_dst_layout_to_layout(j);
+ result = radv_CreateRenderPass(radv_device_to_handle(device),
+ &(VkRenderPassCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments = &(VkAttachmentDescription) {
+ .format = pipeline_formats[i],
+ .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = layout,
+ .finalLayout = layout,
},
- .pResolveAttachments = NULL,
- .pDepthStencilAttachment = &(VkAttachmentReference) {
- .attachment = VK_ATTACHMENT_UNUSED,
- .layout = VK_IMAGE_LAYOUT_GENERAL,
+ .subpassCount = 1,
+ .pSubpasses = &(VkSubpassDescription) {
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .inputAttachmentCount = 0,
+ .colorAttachmentCount = 1,
+ .pColorAttachments = &(VkAttachmentReference) {
+ .attachment = 0,
+ .layout = layout,
+ },
+ .pResolveAttachments = NULL,
+ .pDepthStencilAttachment = &(VkAttachmentReference) {
+ .attachment = VK_ATTACHMENT_UNUSED,
+ .layout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ .preserveAttachmentCount = 1,
+ .pPreserveAttachments = (uint32_t[]) { 0 },
},
- .preserveAttachmentCount = 1,
- .pPreserveAttachments = (uint32_t[]) { 0 },
- },
- .dependencyCount = 0,
- }, &device->meta_state.alloc, &device->meta_state.blit.render_pass[key]);
- if (result != VK_SUCCESS)
- goto fail;
+ .dependencyCount = 0,
+ }, &device->meta_state.alloc, &device->meta_state.blit.render_pass[key][j]);
+ if (result != VK_SUCCESS)
+ goto fail;
+ }

VkPipelineVertexInputStateCreateInfo vi_create_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
@@ -823,7 +829,7 @@ radv_device_init_meta_blit_color(struct radv_device *device,
},
.flags = 0,
.layout = device->meta_state.blit.pipeline_layout,
- .renderPass = device->meta_state.blit.render_pass[key],
+ .renderPass = device->meta_state.blit.render_pass[key][0],
.subpass = 0,
};

diff --git a/src/amd/vulkan/radv_meta_blit2d.c b/src/amd/vulkan/radv_meta_blit2d.c
index 78a5536ceff..e163056257e 100644
--- a/src/amd/vulkan/radv_meta_blit2d.c
+++ b/src/amd/vulkan/radv_meta_blit2d.c
@@ -261,11 +261,12 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer,

if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) {
unsigned fs_key = radv_format_meta_fs_key(dst_temps.iview.vk_format);
+ unsigned dst_layout = radv_meta_dst_layout_from_layout(dst->current_layout);

radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
&(VkRenderPassBeginInfo) {
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
- .renderPass = device->meta_state.blit2d.render_passes[fs_key],
+ .renderPass = device->meta_state.blit2d.render_passes[fs_key][dst_layout],
.framebuffer = dst_temps.fb,
.renderArea = {
.offset = { rects[r].dst_x, rects[r].dst_y, },
@@ -611,9 +612,11 @@ radv_device_finish_meta_blit2d_state(struct radv_device *device)
struct radv_meta_state *state = &device->meta_state;

for(unsigned j = 0; j < NUM_META_FS_KEYS; ++j) {
- radv_DestroyRenderPass(radv_device_to_handle(device),
- state->blit2d.render_passes[j],
- &state->alloc);
+ for (unsigned k = 0; k < RADV_META_DST_LAYOUT_COUNT; ++k) {
+ radv_DestroyRenderPass(radv_device_to_handle(device),
+ state->blit2d.render_passes[j][k],
+ &state->alloc);
+ }
}

for (enum radv_blit_ds_layout j = RADV_BLIT_DS_LAYOUT_TILE_ENABLE; j < RADV_BLIT_DS_LAYOUT_COUNT; j++) {
@@ -701,37 +704,41 @@ blit2d_init_color_pipeline(struct radv_device *device,
},
};

- if (!device->meta_state.blit2d.render_passes[fs_key]) {
- result = radv_CreateRenderPass(radv_device_to_handle(device),
- &(VkRenderPassCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
- .attachmentCount = 1,
- .pAttachments = &(VkAttachmentDescription) {
- .format = format,
- .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
- .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
- .initialLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
- .finalLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
- },
- .subpassCount = 1,
- .pSubpasses = &(VkSubpassDescription) {
- .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
- .inputAttachmentCount = 0,
- .colorAttachmentCount = 1,
- .pColorAttachments = &(VkAttachmentReference) {
- .attachment = 0,
- .layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ for (unsigned dst_layout = 0; dst_layout < RADV_META_DST_LAYOUT_COUNT; ++dst_layout) {
+ if (!device->meta_state.blit2d.render_passes[fs_key][dst_layout]) {
+ VkImageLayout layout = radv_meta_dst_layout_to_layout(dst_layout);
+
+ result = radv_CreateRenderPass(radv_device_to_handle(device),
+ &(VkRenderPassCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments = &(VkAttachmentDescription) {
+ .format = format,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = layout,
+ .finalLayout = layout,
},
- .pResolveAttachments = NULL,
- .pDepthStencilAttachment = &(VkAttachmentReference) {
- .attachment = VK_ATTACHMENT_UNUSED,
- .layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
- },
- .preserveAttachmentCount = 1,
- .pPreserveAttachments = (uint32_t[]) { 0 },
- },
- .dependencyCount = 0,
- }, &device->meta_state.alloc, &device->meta_state.blit2d.render_passes[fs_key]);
+ .subpassCount = 1,
+ .pSubpasses = &(VkSubpassDescription) {
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .inputAttachmentCount = 0,
+ .colorAttachmentCount = 1,
+ .pColorAttachments = &(VkAttachmentReference) {
+ .attachment = 0,
+ .layout = layout,
+ },
+ .pResolveAttachments = NULL,
+ .pDepthStencilAttachment = &(VkAttachmentReference) {
+ .attachment = VK_ATTACHMENT_UNUSED,
+ .layout = layout,
+ },
+ .preserveAttachmentCount = 1,
+ .pPreserveAttachments = (uint32_t[]) { 0 },
+ },
+ .dependencyCount = 0,
+ }, &device->meta_state.alloc, &device->meta_state.blit2d.render_passes[fs_key][dst_layout]);
+ }
}

const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
@@ -790,7 +797,7 @@ blit2d_init_color_pipeline(struct radv_device *device,
},
.flags = 0,
.layout = device->meta_state.blit2d.p_layouts[src_type],
- .renderPass = device->meta_state.blit2d.render_passes[fs_key],
+ .renderPass = device->meta_state.blit2d.render_passes[fs_key][0],
.subpass = 0,
};

diff --git a/src/amd/vulkan/radv_meta_fast_clear.c b/src/amd/vulkan/radv_meta_fast_clear.c
index 98e8f6ac18a..fdeeaeedbfb 100644
--- a/src/amd/vulkan/radv_meta_fast_clear.c
+++ b/src/amd/vulkan/radv_meta_fast_clear.c
@@ -194,8 +194,8 @@ create_pass(struct radv_device *device)
attachment.samples = 1;
attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
- attachment.initialLayout = VK_IMAGE_LAYOUT_GENERAL;
- attachment.finalLayout = VK_IMAGE_LAYOUT_GENERAL;
+ attachment.initialLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
+ attachment.finalLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;

result = radv_CreateRenderPass(device_h,
&(VkRenderPassCreateInfo) {
@@ -210,7 +210,7 @@ create_pass(struct radv_device *device)
.pColorAttachments = (VkAttachmentReference[]) {
{
.attachment = 0,
- .layout = VK_IMAGE_LAYOUT_GENERAL,
+ .layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
},
},
.pResolveAttachments = NULL,
diff --git a/src/amd/vulkan/radv_meta_resolve_fs.c b/src/amd/vulkan/radv_meta_resolve_fs.c
index f34e387a07d..798129ec854 100644
--- a/src/amd/vulkan/radv_meta_resolve_fs.c
+++ b/src/amd/vulkan/radv_meta_resolve_fs.c
@@ -191,7 +191,7 @@ create_resolve_pipeline(struct radv_device *device,
.nir = build_nir_vertex_shader(),
};

- VkRenderPass *rp = &device->meta_state.resolve_fragment.rc[samples_log2].render_pass[fs_key];
+ VkRenderPass *rp = &device->meta_state.resolve_fragment.rc[samples_log2].render_pass[fs_key][0];

assert(!*rp);

@@ -215,36 +215,39 @@ create_resolve_pipeline(struct radv_device *device,
};


- result = radv_CreateRenderPass(radv_device_to_handle(device),
- &(VkRenderPassCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
- .attachmentCount = 1,
- .pAttachments = &(VkAttachmentDescription) {
- .format = format,
- .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
- .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
- .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
- .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- .subpassCount = 1,
- .pSubpasses = &(VkSubpassDescription) {
- .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
- .inputAttachmentCount = 0,
- .colorAttachmentCount = 1,
- .pColorAttachments = &(VkAttachmentReference) {
- .attachment = 0,
- .layout = VK_IMAGE_LAYOUT_GENERAL,
+ for (unsigned dst_layout = 0; dst_layout < RADV_META_DST_LAYOUT_COUNT; ++dst_layout) {
+ VkImageLayout layout = radv_meta_dst_layout_to_layout(dst_layout);
+ result = radv_CreateRenderPass(radv_device_to_handle(device),
+ &(VkRenderPassCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments = &(VkAttachmentDescription) {
+ .format = format,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = layout,
+ .finalLayout = layout,
},
- .pResolveAttachments = NULL,
- .pDepthStencilAttachment = &(VkAttachmentReference) {
- .attachment = VK_ATTACHMENT_UNUSED,
- .layout = VK_IMAGE_LAYOUT_GENERAL,
- },
- .preserveAttachmentCount = 1,
- .pPreserveAttachments = (uint32_t[]) { 0 },
- },
- .dependencyCount = 0,
- }, &device->meta_state.alloc, rp);
+ .subpassCount = 1,
+ .pSubpasses = &(VkSubpassDescription) {
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .inputAttachmentCount = 0,
+ .colorAttachmentCount = 1,
+ .pColorAttachments = &(VkAttachmentReference) {
+ .attachment = 0,
+ .layout = layout,
+ },
+ .pResolveAttachments = NULL,
+ .pDepthStencilAttachment = &(VkAttachmentReference) {
+ .attachment = VK_ATTACHMENT_UNUSED,
+ .layout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ .preserveAttachmentCount = 1,
+ .pPreserveAttachments = (uint32_t[]) { 0 },
+ },
+ .dependencyCount = 0,
+ }, &device->meta_state.alloc, rp + dst_layout);
+ }


const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
@@ -353,9 +356,11 @@ radv_device_finish_meta_resolve_fragment_state(struct radv_device *device)
struct radv_meta_state *state = &device->meta_state;
for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
for (unsigned j = 0; j < NUM_META_FS_KEYS; ++j) {
- radv_DestroyRenderPass(radv_device_to_handle(device),
- state->resolve_fragment.rc[i].render_pass[j],
- &state->alloc);
+ for(unsigned k =0; k < RADV_META_DST_LAYOUT_COUNT; ++k) {
+ radv_DestroyRenderPass(radv_device_to_handle(device),
+ state->resolve_fragment.rc[i].render_pass[j][k],
+ &state->alloc);
+ }
radv_DestroyPipeline(radv_device_to_handle(device),
state->resolve_fragment.rc[i].pipeline[j],
&state->alloc);
@@ -452,6 +457,7 @@ void radv_meta_resolve_fragment_image(struct radv_cmd_buffer *cmd_buffer,
const uint32_t samples = src_image->info.samples;
const uint32_t samples_log2 = ffs(samples) - 1;
unsigned fs_key = radv_format_meta_fs_key(dest_image->vk_format);
+ unsigned dst_layout = radv_meta_dst_layout_from_layout(dest_image_layout);
VkRenderPass rp;
for (uint32_t r = 0; r < region_count; ++r) {
const VkImageResolve *region = &regions[r];
@@ -467,7 +473,7 @@ void radv_meta_resolve_fragment_image(struct radv_cmd_buffer *cmd_buffer,
radv_fast_clear_flush_image_inplace(cmd_buffer, src_image, &range);
}

- rp = device->meta_state.resolve_fragment.rc[samples_log2].render_pass[fs_key];
+ rp = device->meta_state.resolve_fragment.rc[samples_log2].render_pass[fs_key][dst_layout];

radv_meta_save(&saved_state, cmd_buffer,
RADV_META_SAVE_GRAPHICS_PIPELINE |
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index d7e9070fbb8..db8ea895e3a 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -369,6 +369,22 @@ static inline VkImageLayout radv_meta_blit_ds_to_layout(enum radv_blit_ds_layout
return ds_layout == RADV_BLIT_DS_LAYOUT_TILE_ENABLE ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL;
}

+enum radv_meta_dst_layout {
+ RADV_META_DST_LAYOUT_GENERAL,
+ RADV_META_DST_LAYOUT_OPTIMAL,
+ RADV_META_DST_LAYOUT_COUNT,
+};
+
+static inline enum radv_meta_dst_layout radv_meta_dst_layout_from_layout(VkImageLayout layout)
+{
+ return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_META_DST_LAYOUT_GENERAL : RADV_META_DST_LAYOUT_OPTIMAL;
+}
+
+static inline VkImageLayout radv_meta_dst_layout_to_layout(enum radv_meta_dst_layout layout)
+{
+ return layout == RADV_META_DST_LAYOUT_OPTIMAL ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL;
+}
+
struct radv_meta_state {
VkAllocationCallbacks alloc;

@@ -390,7 +406,7 @@ struct radv_meta_state {
VkPipelineLayout clear_color_p_layout;
VkPipelineLayout clear_depth_p_layout;
struct {
- VkRenderPass render_pass[NUM_META_FS_KEYS];
+ VkRenderPass render_pass[NUM_META_FS_KEYS][RADV_META_DST_LAYOUT_COUNT];

/** Pipeline that blits from a 1D image. */
VkPipeline pipeline_1d_src[NUM_META_FS_KEYS];
@@ -415,7 +431,7 @@ struct radv_meta_state {
} blit;

struct {
- VkRenderPass render_passes[NUM_META_FS_KEYS];
+ VkRenderPass render_passes[NUM_META_FS_KEYS][RADV_META_DST_LAYOUT_COUNT];

VkPipelineLayout p_layouts[3];
VkDescriptorSetLayout ds_layouts[3];
@@ -474,7 +490,7 @@ struct radv_meta_state {
VkPipelineLayout p_layout;

struct {
- VkRenderPass render_pass[NUM_META_FS_KEYS];
+ VkRenderPass render_pass[NUM_META_FS_KEYS][RADV_META_DST_LAYOUT_COUNT];
VkPipeline pipeline[NUM_META_FS_KEYS];
} rc[MAX_SAMPLES_LOG2];
} resolve_fragment;
--
2.15.1
Bas Nieuwenhuizen
2017-12-29 02:06:17 UTC
Permalink
Before this DCC was in practice disabled for most games. This
enables practical DCC use. Expect a 5-10% perf increase on a
bunch of games on vega @ 4k.
---
src/amd/vulkan/radv_image.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
index 6088928df80..d8cee6f6cba 100644
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -148,8 +148,7 @@ radv_init_surface(struct radv_device *device,
}
}

- if ((pCreateInfo->usage & (VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
- VK_IMAGE_USAGE_STORAGE_BIT)) ||
+ if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) ||
(pCreateInfo->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT_KHR) ||
!dcc_compatible_formats ||
(pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) ||
--
2.15.1
Dieter Nützel
2017-12-29 05:05:41 UTC
Permalink
For the series:

Tested-by: Dieter Nützel <***@nuetzel-hh.de>

on RX580 with

'smoketest' somewhat faster
'F1 2017' little bit slower

BTW You dropped all my tb last time.

Dieter
Post by Bas Nieuwenhuizen
Before this DCC was in practice disabled for most games. This
enables practical DCC use. Expect a 5-10% perf increase on a
---
src/amd/vulkan/radv_image.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
index 6088928df80..d8cee6f6cba 100644
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -148,8 +148,7 @@ radv_init_surface(struct radv_device *device,
}
}
- if ((pCreateInfo->usage & (VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
- VK_IMAGE_USAGE_STORAGE_BIT)) ||
+ if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) ||
(pCreateInfo->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT_KHR) ||
!dcc_compatible_formats ||
(pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) ||
Loading...