Discussion:
[Mesa-dev] [PATCH 1/6] radeonsi: allow DMABUF exports for local buffers
Marek Olšák
2017-12-05 19:05:53 UTC
Permalink
From: Marek Olšák <***@amd.com>

Cc: 17.3 <mesa-***@lists.freedesktop.org>
---
src/gallium/drivers/radeon/r600_texture.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index 2aa47b5..07f7c33 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -739,22 +739,26 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen,
stride = rtex->surface.u.gfx9.surf_pitch *
rtex->surface.bpe;
slice_size = rtex->surface.u.gfx9.surf_slice_size;
} else {
offset = rtex->surface.u.legacy.level[0].offset;
stride = rtex->surface.u.legacy.level[0].nblk_x *
rtex->surface.bpe;
slice_size = (uint64_t)rtex->surface.u.legacy.level[0].slice_size_dw * 4;
}
} else {
+ /* Buffer exports are for the OpenCL interop. */
/* Move a suballocated buffer into a non-suballocated allocation. */
- if (sscreen->ws->buffer_is_suballocated(res->buf)) {
+ if (sscreen->ws->buffer_is_suballocated(res->buf) ||
+ /* A DMABUF export always fails if the BO is local. */
+ (rtex->resource.flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
+ whandle->type != DRM_API_HANDLE_TYPE_KMS)) {
assert(!res->b.is_shared);

/* Allocate a new buffer with PIPE_BIND_SHARED. */
struct pipe_resource templ = res->b.b;
templ.bind |= PIPE_BIND_SHARED;

struct pipe_resource *newb =
screen->resource_create(screen, &templ);
if (!newb)
return false;
--
2.7.4
Marek Olšák
2017-12-05 19:05:54 UTC
Permalink
From: Marek Olšák <***@amd.com>

---
src/gallium/drivers/radeonsi/si_debug.c | 1 +
1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c
index 22609b7..385ce39 100644
--- a/src/gallium/drivers/radeonsi/si_debug.c
+++ b/src/gallium/drivers/radeonsi/si_debug.c
@@ -1069,20 +1069,21 @@ void si_check_vm_faults(struct r600_common_context *ctx,
fprintf(f, "Last apitrace call: %u\n\n",
sctx->apitrace_call_number);

switch (ring) {
case RING_GFX: {
struct u_log_context log;
u_log_context_init(&log);

si_log_draw_state(sctx, &log);
si_log_compute_state(sctx, &log);
+ si_log_cs(sctx, &log, true);

u_log_new_page_print(&log, f);
u_log_context_destroy(&log);
break;
}
case RING_DMA:
si_dump_dma(sctx, saved, f);
break;

default:
--
2.7.4
Marek Olšák
2017-12-05 19:05:55 UTC
Permalink
From: Marek Olšák <***@amd.com>

---
src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 1 +
1 file changed, 1 insertion(+)

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 089a358..63cd632 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -642,20 +642,21 @@ static bool amdgpu_ib_new_buffer(struct amdgpu_winsys *ws, struct amdgpu_ib *ib,
buffer_size = MAX2(buffer_size, 8 * 1024 * 4);
break;
default:
unreachable("unhandled IB type");
}

pb = ws->base.buffer_create(&ws->base, buffer_size,
ws->info.gart_page_size,
RADEON_DOMAIN_GTT,
RADEON_FLAG_NO_INTERPROCESS_SHARING |
+ RADEON_FLAG_READ_ONLY |
(ring_type == RING_GFX ||
ring_type == RING_COMPUTE ||
ring_type == RING_DMA ?
RADEON_FLAG_GTT_WC : 0));
if (!pb)
return false;

mapped = ws->base.buffer_map(pb, NULL, PIPE_TRANSFER_WRITE);
if (!mapped) {
pb_reference(&pb, NULL);
--
2.7.4
Marek Olšák
2017-12-05 19:05:57 UTC
Permalink
From: Marek Olšák <***@amd.com>

---
src/gallium/drivers/radeon/r600_pipe_common.c | 7 +++++++
src/gallium/drivers/radeon/r600_pipe_common.h | 1 +
src/gallium/drivers/radeonsi/si_fence.c | 2 +-
3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index d85f9f0..9090e65 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -447,20 +447,25 @@ bool si_common_context_init(struct r600_common_context *rctx,
rctx->b.stream_uploader = u_upload_create(&rctx->b, 1024 * 1024,
0, PIPE_USAGE_STREAM, 0);
if (!rctx->b.stream_uploader)
return false;

rctx->b.const_uploader = u_upload_create(&rctx->b, 128 * 1024,
0, PIPE_USAGE_DEFAULT, 0);
if (!rctx->b.const_uploader)
return false;

+ rctx->cached_gtt_allocator = u_upload_create(&rctx->b, 16 * 1024,
+ 0, PIPE_USAGE_STAGING, 0);
+ if (!rctx->cached_gtt_allocator)
+ return false;
+
rctx->ctx = rctx->ws->ctx_create(rctx->ws);
if (!rctx->ctx)
return false;

if (sscreen->info.num_sdma_rings && !(sscreen->debug_flags & DBG(NO_ASYNC_DMA))) {
rctx->dma.cs = rctx->ws->cs_create(rctx->ctx, RING_DMA,
r600_flush_dma_ring,
rctx);
rctx->dma.flush = r600_flush_dma_ring;
}
@@ -491,20 +496,22 @@ void si_common_context_cleanup(struct r600_common_context *rctx)
rctx->ws->cs_destroy(rctx->gfx.cs);
if (rctx->dma.cs)
rctx->ws->cs_destroy(rctx->dma.cs);
if (rctx->ctx)
rctx->ws->ctx_destroy(rctx->ctx);

if (rctx->b.stream_uploader)
u_upload_destroy(rctx->b.stream_uploader);
if (rctx->b.const_uploader)
u_upload_destroy(rctx->b.const_uploader);
+ if (rctx->cached_gtt_allocator)
+ u_upload_destroy(rctx->cached_gtt_allocator);

slab_destroy_child(&rctx->pool_transfers);
slab_destroy_child(&rctx->pool_transfers_unsync);

if (rctx->allocator_zeroed_memory) {
u_suballocator_destroy(rctx->allocator_zeroed_memory);
}
rctx->ws->fence_reference(&rctx->last_gfx_fence, NULL);
rctx->ws->fence_reference(&rctx->last_sdma_fence, NULL);
r600_resource_reference(&rctx->eop_bug_scratch, NULL);
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index d1fdea0..a8e632c 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -388,20 +388,21 @@ struct r600_common_context {
struct si_screen *screen;
struct radeon_winsys *ws;
struct radeon_winsys_ctx *ctx;
enum radeon_family family;
enum chip_class chip_class;
struct r600_ring gfx;
struct r600_ring dma;
struct pipe_fence_handle *last_gfx_fence;
struct pipe_fence_handle *last_sdma_fence;
struct r600_resource *eop_bug_scratch;
+ struct u_upload_mgr *cached_gtt_allocator;
unsigned num_gfx_cs_flushes;
unsigned initial_gfx_cs_size;
unsigned gpu_reset_counter;
unsigned last_dirty_tex_counter;
unsigned last_compressed_colortex_counter;
unsigned last_num_draw_calls;

struct threaded_context *tc;
struct u_suballocator *allocator_zeroed_memory;
struct slab_child_pool pool_transfers;
diff --git a/src/gallium/drivers/radeonsi/si_fence.c b/src/gallium/drivers/radeonsi/si_fence.c
index 0d165a1..3c4d754 100644
--- a/src/gallium/drivers/radeonsi/si_fence.c
+++ b/src/gallium/drivers/radeonsi/si_fence.c
@@ -144,21 +144,21 @@ static bool si_fine_fence_signaled(struct radeon_winsys *rws,

static void si_fine_fence_set(struct si_context *ctx,
struct si_fine_fence *fine,
unsigned flags)
{
uint32_t *fence_ptr;

assert(util_bitcount(flags & (PIPE_FLUSH_TOP_OF_PIPE | PIPE_FLUSH_BOTTOM_OF_PIPE)) == 1);

/* Use uncached system memory for the fence. */
- u_upload_alloc(ctx->b.b.stream_uploader, 0, 4, 4,
+ u_upload_alloc(ctx->b.cached_gtt_allocator, 0, 4, 4,
&fine->offset, (struct pipe_resource **)&fine->buf, (void **)&fence_ptr);
if (!fine->buf)
return;

*fence_ptr = 0;

uint64_t fence_va = fine->buf->gpu_address + fine->offset;

radeon_add_to_buffer_list(&ctx->b, &ctx->b.gfx, fine->buf,
RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
--
2.7.4
Marek Olšák
2017-12-05 19:05:56 UTC
Permalink
From: Marek Olšák <***@amd.com>

---
src/gallium/drivers/radeon/r600_buffer_common.c | 3 +++
src/gallium/drivers/radeon/r600_pipe_common.h | 1 +
src/gallium/drivers/radeonsi/si_pipe.c | 2 ++
src/gallium/drivers/radeonsi/si_pipe.h | 1 +
src/gallium/drivers/radeonsi/si_shader.c | 9 ++++++---
5 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c
index ec282d5..55400ab 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -167,20 +167,23 @@ void si_init_resource_fields(struct si_screen *sscreen,

/* Displayable and shareable surfaces are not suballocated. */
if (res->b.b.bind & (PIPE_BIND_SHARED | PIPE_BIND_SCANOUT))
res->flags |= RADEON_FLAG_NO_SUBALLOC; /* shareable */
else
res->flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;

if (sscreen->debug_flags & DBG(NO_WC))
res->flags &= ~RADEON_FLAG_GTT_WC;

+ if (res->b.b.flags & R600_RESOURCE_FLAG_READ_ONLY)
+ res->flags |= RADEON_FLAG_READ_ONLY;
+
/* Set expected VRAM and GART usage for the buffer. */
res->vram_usage = 0;
res->gart_usage = 0;
res->max_forced_staging_uploads = 0;
res->b.max_forced_staging_uploads = 0;

if (res->domains & RADEON_DOMAIN_VRAM) {
res->vram_usage = size;

res->max_forced_staging_uploads =
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 498a741..d1fdea0 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -46,20 +46,21 @@

struct u_log_context;
struct si_screen;
struct si_context;

#define R600_RESOURCE_FLAG_TRANSFER (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
#define R600_RESOURCE_FLAG_FLUSHED_DEPTH (PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
#define R600_RESOURCE_FLAG_FORCE_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
#define R600_RESOURCE_FLAG_DISABLE_DCC (PIPE_RESOURCE_FLAG_DRV_PRIV << 3)
#define R600_RESOURCE_FLAG_UNMAPPABLE (PIPE_RESOURCE_FLAG_DRV_PRIV << 4)
+#define R600_RESOURCE_FLAG_READ_ONLY (PIPE_RESOURCE_FLAG_DRV_PRIV << 5)

/* Debug flags. */
enum {
/* Shader logging options: */
DBG_VS = PIPE_SHADER_VERTEX,
DBG_PS = PIPE_SHADER_FRAGMENT,
DBG_GS = PIPE_SHADER_GEOMETRY,
DBG_TCS = PIPE_SHADER_TESS_CTRL,
DBG_TES = PIPE_SHADER_TESS_EVAL,
DBG_CS = PIPE_SHADER_COMPUTE,
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 5d7837d..676d199 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -822,20 +822,22 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
!(sscreen->debug_flags & DBG(NO_RB_PLUS)) &&
(sscreen->info.family == CHIP_STONEY ||
sscreen->info.family == CHIP_RAVEN);
}

sscreen->dcc_msaa_allowed =
!(sscreen->debug_flags & DBG(NO_DCC_MSAA)) &&
(sscreen->debug_flags & DBG(DCC_MSAA) ||
sscreen->info.chip_class == VI);

+ sscreen->cpdma_prefetch_writes_memory = sscreen->info.chip_class <= VI;
+
(void) mtx_init(&sscreen->shader_parts_mutex, mtx_plain);
sscreen->use_monolithic_shaders =
(sscreen->debug_flags & DBG(MONOLITHIC_SHADERS)) != 0;

sscreen->barrier_flags.cp_to_L2 = SI_CONTEXT_INV_SMEM_L1 |
SI_CONTEXT_INV_VMEM_L1;
if (sscreen->info.chip_class <= VI) {
sscreen->barrier_flags.cp_to_L2 |= SI_CONTEXT_INV_GLOBAL_L2;
sscreen->barrier_flags.L2_to_cp |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
}
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 7a09937..3a959f9 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -116,20 +116,21 @@ struct si_screen {
bool dpbb_allowed;
bool dfsm_allowed;
bool llvm_has_working_vgpr_indexing;

/* Whether shaders are monolithic (1-part) or separate (3-part). */
bool use_monolithic_shaders;
bool record_llvm_ir;
bool has_rbplus; /* if RB+ registers exist */
bool rbplus_allowed; /* if RB+ is allowed */
bool dcc_msaa_allowed;
+ bool cpdma_prefetch_writes_memory;

struct slab_parent_pool pool_transfers;

/* Texture filter settings. */
int force_aniso; /* -1 = disabled */

/* Auxiliary context. Mainly used to initialize resources.
* It must be locked prior to using and flushed before unlocking. */
struct pipe_context *aux_context;
mtx_t aux_context_lock;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 6a1293b..5da9ec0 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5050,23 +5050,26 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)

assert(!prolog || !prolog->rodata_size);
assert(!previous_stage || !previous_stage->rodata_size);
assert(!prolog2 || !prolog2->rodata_size);
assert((!prolog && !previous_stage && !prolog2 && !epilog) ||
!mainb->rodata_size);
assert(!epilog || !epilog->rodata_size);

r600_resource_reference(&shader->bo, NULL);
shader->bo = (struct r600_resource*)
- pipe_buffer_create(&sscreen->b, 0,
- PIPE_USAGE_IMMUTABLE,
- align(bo_size, SI_CPDMA_ALIGNMENT));
+ si_aligned_buffer_create(&sscreen->b,
+ sscreen->cpdma_prefetch_writes_memory ?
+ 0 : R600_RESOURCE_FLAG_READ_ONLY,
+ PIPE_USAGE_IMMUTABLE,
+ align(bo_size, SI_CPDMA_ALIGNMENT),
+ 256);
if (!shader->bo)
return -ENOMEM;

/* Upload. */
ptr = sscreen->ws->buffer_map(shader->bo->buf, NULL,
PIPE_TRANSFER_READ_WRITE |
PIPE_TRANSFER_UNSYNCHRONIZED);

/* Don't use util_memcpy_cpu_to_le32. LLVM binaries are
* endian-independent. */
--
2.7.4
Marek Olšák
2017-12-05 19:05:58 UTC
Permalink
From: Marek Olšák <***@amd.com>

and anything that clones these uploaders, like u_threaded_context.
---
src/gallium/drivers/radeon/r600_pipe_common.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 9090e65..9e45a9f 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -438,26 +438,29 @@ bool si_common_context_init(struct r600_common_context *rctx,
return false;
}

rctx->allocator_zeroed_memory =
u_suballocator_create(&rctx->b, sscreen->info.gart_page_size,
0, PIPE_USAGE_DEFAULT, 0, true);
if (!rctx->allocator_zeroed_memory)
return false;

rctx->b.stream_uploader = u_upload_create(&rctx->b, 1024 * 1024,
- 0, PIPE_USAGE_STREAM, 0);
+ 0, PIPE_USAGE_STREAM,
+ R600_RESOURCE_FLAG_READ_ONLY);
if (!rctx->b.stream_uploader)
return false;

rctx->b.const_uploader = u_upload_create(&rctx->b, 128 * 1024,
- 0, PIPE_USAGE_DEFAULT, 0);
+ 0, PIPE_USAGE_DEFAULT,
+ sscreen->cpdma_prefetch_writes_memory ?
+ 0 : R600_RESOURCE_FLAG_READ_ONLY);
if (!rctx->b.const_uploader)
return false;

rctx->cached_gtt_allocator = u_upload_create(&rctx->b, 16 * 1024,
0, PIPE_USAGE_STAGING, 0);
if (!rctx->cached_gtt_allocator)
return false;

rctx->ctx = rctx->ws->ctx_create(rctx->ws);
if (!rctx->ctx)
--
2.7.4
Dieter Nützel
2017-12-06 08:59:35 UTC
Permalink
For the series:

Tested-by: Dieter Nützel <***@nuetzel-hh.de>

Dieter
Post by Marek Olšák
and anything that clones these uploaders, like u_threaded_context.
---
src/gallium/drivers/radeon/r600_pipe_common.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c
b/src/gallium/drivers/radeon/r600_pipe_common.c
index 9090e65..9e45a9f 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -438,26 +438,29 @@ bool si_common_context_init(struct
r600_common_context *rctx,
return false;
}
rctx->allocator_zeroed_memory =
u_suballocator_create(&rctx->b, sscreen->info.gart_page_size,
0, PIPE_USAGE_DEFAULT, 0, true);
if (!rctx->allocator_zeroed_memory)
return false;
rctx->b.stream_uploader = u_upload_create(&rctx->b, 1024 * 1024,
- 0, PIPE_USAGE_STREAM, 0);
+ 0, PIPE_USAGE_STREAM,
+ R600_RESOURCE_FLAG_READ_ONLY);
if (!rctx->b.stream_uploader)
return false;
rctx->b.const_uploader = u_upload_create(&rctx->b, 128 * 1024,
- 0, PIPE_USAGE_DEFAULT, 0);
+ 0, PIPE_USAGE_DEFAULT,
+ sscreen->cpdma_prefetch_writes_memory ?
+ 0 : R600_RESOURCE_FLAG_READ_ONLY);
if (!rctx->b.const_uploader)
return false;
rctx->cached_gtt_allocator = u_upload_create(&rctx->b, 16 * 1024,
0, PIPE_USAGE_STAGING, 0);
if (!rctx->cached_gtt_allocator)
return false;
rctx->ctx = rctx->ws->ctx_create(rctx->ws);
if (!rctx->ctx)
Nicolai Hähnle
2017-12-06 11:36:53 UTC
Permalink
Post by Marek Olšák
and anything that clones these uploaders, like u_threaded_context.
---
src/gallium/drivers/radeon/r600_pipe_common.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 9090e65..9e45a9f 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -438,26 +438,29 @@ bool si_common_context_init(struct r600_common_context *rctx,
return false;
}
rctx->allocator_zeroed_memory =
u_suballocator_create(&rctx->b, sscreen->info.gart_page_size,
0, PIPE_USAGE_DEFAULT, 0, true);
if (!rctx->allocator_zeroed_memory)
return false;
rctx->b.stream_uploader = u_upload_create(&rctx->b, 1024 * 1024,
- 0, PIPE_USAGE_STREAM, 0);
+ 0, PIPE_USAGE_STREAM,
+ R600_RESOURCE_FLAG_READ_ONLY);
if (!rctx->b.stream_uploader)
return false;
rctx->b.const_uploader = u_upload_create(&rctx->b, 128 * 1024,
- 0, PIPE_USAGE_DEFAULT, 0);
+ 0, PIPE_USAGE_DEFAULT,
+ sscreen->cpdma_prefetch_writes_memory ?
+ 0 : R600_RESOURCE_FLAG_READ_ONLY);
if (!rctx->b.const_uploader)
return false;
rctx->cached_gtt_allocator = u_upload_create(&rctx->b, 16 * 1024,
0, PIPE_USAGE_STAGING, 0);
if (!rctx->cached_gtt_allocator)
return false;
rctx->ctx = rctx->ws->ctx_create(rctx->ws);
if (!rctx->ctx)
--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
Nicolai Hähnle
2017-12-06 11:34:34 UTC
Permalink
Post by Marek Olšák
---
src/gallium/drivers/radeon/r600_texture.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index 2aa47b5..07f7c33 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -739,22 +739,26 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen,
stride = rtex->surface.u.gfx9.surf_pitch *
rtex->surface.bpe;
slice_size = rtex->surface.u.gfx9.surf_slice_size;
} else {
offset = rtex->surface.u.legacy.level[0].offset;
stride = rtex->surface.u.legacy.level[0].nblk_x *
rtex->surface.bpe;
slice_size = (uint64_t)rtex->surface.u.legacy.level[0].slice_size_dw * 4;
}
} else {
+ /* Buffer exports are for the OpenCL interop. */
/* Move a suballocated buffer into a non-suballocated allocation. */
- if (sscreen->ws->buffer_is_suballocated(res->buf)) {
+ if (sscreen->ws->buffer_is_suballocated(res->buf) ||
+ /* A DMABUF export always fails if the BO is local. */
+ (rtex->resource.flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
+ whandle->type != DRM_API_HANDLE_TYPE_KMS)) {
I still don't think this is right. Or at least, it's bound to blow up in
our faces at some point. Though I think we may have talked past each
other, apologies that I haven't made myself clear.

The issues I have in mind are scenarios like this:

1. Buffer allocated in OpenGL.
2. Buffer exported as KMS handle for importing to OpenCL in the same
process.
3. Buffer exported as an FD <-- at this point, the OpenGL and OpenCL
buffers go out of sync because OpenGL re-allocates the buffer but OpenCL
isn't informed.

Or:

1. Buffer allocated in OpenGL.
2. Buffer exported as KMS handle for importing to OpenCL in the same
process.
3. Buffer attempted to be exported as an FD from OpenCL <-- fails
because the buffer is local (has NO_INTERPROCESS_SHARING), and people
will be utterly clueless as to what's going on.

FWIW, I think the patch is good if you drop the whandle->type check so
that we re-allocate unconditionally.

Cheers,
Nicolai
Post by Marek Olšák
assert(!res->b.is_shared);
/* Allocate a new buffer with PIPE_BIND_SHARED. */
struct pipe_resource templ = res->b.b;
templ.bind |= PIPE_BIND_SHARED;
struct pipe_resource *newb =
screen->resource_create(screen, &templ);
if (!newb)
return false;
--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
Marek Olšák
2017-12-06 12:43:43 UTC
Permalink
Post by Marek Olšák
---
src/gallium/drivers/radeon/r600_texture.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/radeon/r600_texture.c
b/src/gallium/drivers/radeon/r600_texture.c
index 2aa47b5..07f7c33 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -739,22 +739,26 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen,
stride = rtex->surface.u.gfx9.surf_pitch *
rtex->surface.bpe;
slice_size = rtex->surface.u.gfx9.surf_slice_size;
} else {
offset = rtex->surface.u.legacy.level[0].offset;
stride = rtex->surface.u.legacy.level[0].nblk_x *
rtex->surface.bpe;
slice_size = (uint64_t)rtex->surface.u.legacy.level[0].slice_size_dw
* 4;
}
} else {
+ /* Buffer exports are for the OpenCL interop. */
/* Move a suballocated buffer into a non-suballocated allocation. */
- if (sscreen->ws->buffer_is_suballocated(res->buf)) {
+ if (sscreen->ws->buffer_is_suballocated(res->buf) ||
+ /* A DMABUF export always fails if the BO is local. */
+ (rtex->resource.flags & RADEON_FLAG_NO_INTERPROCESS_SHARING
&&
+ whandle->type != DRM_API_HANDLE_TYPE_KMS)) {
I still don't think this is right. Or at least, it's bound to blow up in
our faces at some point. Though I think we may have talked past each other,
apologies that I haven't made myself clear.

The issues I have in mind are scenarios like this:

1. Buffer allocated in OpenGL.
2. Buffer exported as KMS handle for importing to OpenCL in the same
process.
3. Buffer exported as an FD <-- at this point, the OpenGL and OpenCL
buffers go out of sync because OpenGL re-allocates the buffer but OpenCL
isn't informed.

Or:

1. Buffer allocated in OpenGL.
2. Buffer exported as KMS handle for importing to OpenCL in the same
process.
3. Buffer attempted to be exported as an FD from OpenCL <-- fails because
the buffer is local (has NO_INTERPROCESS_SHARING), and people will be
utterly clueless as to what's going on.

FWIW, I think the patch is good if you drop the whandle->type check so that
we re-allocate unconditionally.


I can remove the check, but buffers are only exported as DMABUF. This patch
isn't just random - it does fix OpenCL interop.

Marek



Cheers,
Nicolai



assert(!res->b.is_shared);
Post by Marek Olšák
/* Allocate a new buffer with PIPE_BIND_SHARED. */
struct pipe_resource templ = res->b.b;
templ.bind |= PIPE_BIND_SHARED;
struct pipe_resource *newb =
screen->resource_create(screen, &templ);
if (!newb)
return false;
--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
Nicolai Hähnle
2017-12-06 15:52:07 UTC
Permalink
---
  src/gallium/drivers/radeon/r600_texture.c | 6 +++++-
  1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/radeon/r600_texture.c
b/src/gallium/drivers/radeon/r600_texture.c
index 2aa47b5..07f7c33 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -739,22 +739,26 @@ static boolean
r600_texture_get_handle(struct pipe_screen* screen,
                        stride = rtex->surface.u.gfx9.surf_pitch *
                                 rtex->surface.bpe;
                        slice_size =
rtex->surface.u.gfx9.surf_slice_size;
                } else {
                        offset =
rtex->surface.u.legacy.level[0].offset;
                        stride =
rtex->surface.u.legacy.level[0].nblk_x *
                                 rtex->surface.bpe;
                        slice_size =
(uint64_t)rtex->surface.u.legacy.level[0].slice_size_dw * 4;
                }
        } else {
+               /* Buffer exports are for the OpenCL interop. */
                /* Move a suballocated buffer into a
non-suballocated allocation. */
-               if (sscreen->ws->buffer_is_suballocated(res->buf)) {
+               if (sscreen->ws->buffer_is_suballocated(res->buf) ||
+                   /* A DMABUF export always fails if the BO is
local. */
+                   (rtex->resource.flags &
RADEON_FLAG_NO_INTERPROCESS_SHARING &&
+                    whandle->type != DRM_API_HANDLE_TYPE_KMS)) {
I still don't think this is right. Or at least, it's bound to blow
up in our faces at some point. Though I think we may have talked
past each other, apologies that I haven't made myself clear.
1. Buffer allocated in OpenGL.
2. Buffer exported as KMS handle for importing to OpenCL in the same
process.
3. Buffer exported as an FD <-- at this point, the OpenGL and OpenCL
buffers go out of sync because OpenGL re-allocates the buffer but
OpenCL isn't informed.
1. Buffer allocated in OpenGL.
2. Buffer exported as KMS handle for importing to OpenCL in the same
process.
3. Buffer attempted to be exported as an FD from OpenCL <-- fails
because the buffer is local (has NO_INTERPROCESS_SHARING), and
people will be utterly clueless as to what's going on.
FWIW, I think the patch is good if you drop the whandle->type check
so that we re-allocate unconditionally.
I can remove the check, but buffers are only exported as DMABUF. This
patch isn't just random - it does fix OpenCL interop.
Well, I doubt it's critical for performance, OpenCL doesn't know about
the NO_INTERPROCESS_SHARING status anyway. So I'd prefer to remove the
check, in case we change the exporting at some point.

Cheers,
Nicolai
Marek
Cheers,
Nicolai
                        assert(!res->b.is_shared);
                        /* Allocate a new buffer with
PIPE_BIND_SHARED. */
                        struct pipe_resource templ = res->b.b;
                        templ.bind |= PIPE_BIND_SHARED;
                        struct pipe_resource *newb =
                                screen->resource_create(screen,
&templ);
                        if (!newb)
                                return false;
--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
Emil Velikov
2017-12-06 16:08:24 UTC
Permalink
Post by Marek Olšák
---
src/gallium/drivers/radeon/r600_texture.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/radeon/r600_texture.c
b/src/gallium/drivers/radeon/r600_texture.c
index 2aa47b5..07f7c33 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -739,22 +739,26 @@ static boolean
r600_texture_get_handle(struct pipe_screen* screen,
stride = rtex->surface.u.gfx9.surf_pitch *
rtex->surface.bpe;
slice_size =
rtex->surface.u.gfx9.surf_slice_size;
} else {
offset =
rtex->surface.u.legacy.level[0].offset;
stride =
rtex->surface.u.legacy.level[0].nblk_x *
rtex->surface.bpe;
slice_size =
(uint64_t)rtex->surface.u.legacy.level[0].slice_size_dw * 4;
}
} else {
+ /* Buffer exports are for the OpenCL interop. */
/* Move a suballocated buffer into a
non-suballocated allocation. */
- if (sscreen->ws->buffer_is_suballocated(res->buf)) {
+ if (sscreen->ws->buffer_is_suballocated(res->buf) ||
+ /* A DMABUF export always fails if the BO is
local. */
+ (rtex->resource.flags &
RADEON_FLAG_NO_INTERPROCESS_SHARING &&
+ whandle->type != DRM_API_HANDLE_TYPE_KMS)) {
I still don't think this is right. Or at least, it's bound to blow
up in our faces at some point. Though I think we may have talked
past each other, apologies that I haven't made myself clear.
1. Buffer allocated in OpenGL.
2. Buffer exported as KMS handle for importing to OpenCL in the same
process.
3. Buffer exported as an FD <-- at this point, the OpenGL and OpenCL
buffers go out of sync because OpenGL re-allocates the buffer but
OpenCL isn't informed.
1. Buffer allocated in OpenGL.
2. Buffer exported as KMS handle for importing to OpenCL in the same
process.
3. Buffer attempted to be exported as an FD from OpenCL <-- fails
because the buffer is local (has NO_INTERPROCESS_SHARING), and
people will be utterly clueless as to what's going on.
FWIW, I think the patch is good if you drop the whandle->type check
so that we re-allocate unconditionally.
I can remove the check, but buffers are only exported as DMABUF. This
patch isn't just random - it does fix OpenCL interop.
Well, I doubt it's critical for performance, OpenCL doesn't know about the
NO_INTERPROCESS_SHARING status anyway. So I'd prefer to remove the check, in
case we change the exporting at some point.
Humble request: please include the highlights of the discussion in the
commit message ;-)

Thanks
Emil
Marek Olšák
2017-12-06 16:18:29 UTC
Permalink
Post by Emil Velikov
Post by Marek Olšák
---
src/gallium/drivers/radeon/r600_texture.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/radeon/r600_texture.c
b/src/gallium/drivers/radeon/r600_texture.c
index 2aa47b5..07f7c33 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -739,22 +739,26 @@ static boolean
r600_texture_get_handle(struct pipe_screen* screen,
stride = rtex->surface.u.gfx9.surf_pitch *
rtex->surface.bpe;
slice_size =
rtex->surface.u.gfx9.surf_slice_size;
} else {
offset =
rtex->surface.u.legacy.level[0].offset;
stride =
rtex->surface.u.legacy.level[0].nblk_x *
rtex->surface.bpe;
slice_size =
(uint64_t)rtex->surface.u.legacy.level[0].slice_size_dw * 4;
}
} else {
+ /* Buffer exports are for the OpenCL interop. */
/* Move a suballocated buffer into a
non-suballocated allocation. */
- if (sscreen->ws->buffer_is_suballocated(res->buf)) {
+ if (sscreen->ws->buffer_is_suballocated(res->buf) ||
+ /* A DMABUF export always fails if the BO is
local. */
+ (rtex->resource.flags &
RADEON_FLAG_NO_INTERPROCESS_SHARING &&
+ whandle->type != DRM_API_HANDLE_TYPE_KMS)) {
I still don't think this is right. Or at least, it's bound to blow
up in our faces at some point. Though I think we may have talked
past each other, apologies that I haven't made myself clear.
1. Buffer allocated in OpenGL.
2. Buffer exported as KMS handle for importing to OpenCL in the same
process.
3. Buffer exported as an FD <-- at this point, the OpenGL and OpenCL
buffers go out of sync because OpenGL re-allocates the buffer but
OpenCL isn't informed.
1. Buffer allocated in OpenGL.
2. Buffer exported as KMS handle for importing to OpenCL in the same
process.
3. Buffer attempted to be exported as an FD from OpenCL <-- fails
because the buffer is local (has NO_INTERPROCESS_SHARING), and
people will be utterly clueless as to what's going on.
FWIW, I think the patch is good if you drop the whandle->type check
so that we re-allocate unconditionally.
I can remove the check, but buffers are only exported as DMABUF. This
patch isn't just random - it does fix OpenCL interop.
Well, I doubt it's critical for performance, OpenCL doesn't know about the
NO_INTERPROCESS_SHARING status anyway. So I'd prefer to remove the check, in
case we change the exporting at some point.
Humble request: please include the highlights of the discussion in the
commit message ;-)
Sorry, I just pushed the patches.

Marek

Loading...