[Mesa-dev] [PATCH] radv/winsys: allow to submit up to 4 IBs for chips without chaining

Samuel Pitoiset samuel.pitoiset at gmail.com
Fri Apr 20 16:18:16 UTC 2018



On 04/20/2018 03:30 PM, Grazvydas Ignotas wrote:
> On Fri, Apr 20, 2018 at 3:21 PM, Samuel Pitoiset
> <samuel.pitoiset at gmail.com> wrote:
>> The SI family doesn't support chaining which means the maximum
>> size in dwords per CS is limited. When that limit was reached
>> we failed to submit the CS and the application crashed.
>>
>> This patch allows to submit up to 4 IBs which is currently the
>> limit, but recent amdgpu supports more than that.
>>
>> Please note that we can reach the limit of 4 IBs per submit
>> but currently we can't improve that. The only solution is to
>> upgrade libdrm. That will be improved later but for now this
>> should fix crashes on SI or when using RADV_DEBUG=noibs.
>>
>> Fixes: 36cb5508e89 ("radv/winsys: Fail early on overgrown cs.")
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105775
>> Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
>> ---
>>   src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 218 ++++++++++++++----
>>   1 file changed, 168 insertions(+), 50 deletions(-)
>>
>> diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
>> index c4b2232ce9e..17e6d8ba2b6 100644
>> --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
>> +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
>> @@ -68,6 +68,10 @@ struct radv_amdgpu_cs {
>>          struct radeon_winsys_bo     **virtual_buffers;
>>          uint8_t                     *virtual_buffer_priorities;
>>          int                         *virtual_buffer_hash_table;
>> +
>> +       /* For chips that don't support chaining. */
>> +       struct radeon_winsys_cs     *old_cs_buffers;
>> +       unsigned                    num_old_cs_buffers;
>>   };
>>
>>   static inline struct radv_amdgpu_cs *
>> @@ -201,6 +205,12 @@ static void radv_amdgpu_cs_destroy(struct radeon_winsys_cs *rcs)
>>          for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i)
>>                  cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]);
>>
>> +       for (unsigned i = 0; i < cs->num_old_cs_buffers; ++i) {
>> +               struct radeon_winsys_cs *rcs = &cs->old_cs_buffers[i];
>> +               free(rcs->buf);
>> +       }
>> +
>> +       free(cs->old_cs_buffers);
>>          free(cs->old_ib_buffers);
>>          free(cs->virtual_buffers);
>>          free(cs->virtual_buffer_priorities);
>> @@ -286,9 +296,46 @@ static void radv_amdgpu_cs_grow(struct radeon_winsys_cs *_cs, size_t min_size)
>>                  /* The total ib size cannot exceed limit_dws dwords. */
>>                  if (ib_dws > limit_dws)
>>                  {
>> -                       cs->failed = true;
>> +                       /* The maximum size in dwords has been reached,
>> +                        * try to allocate a new one.
>> +                        */
>> +                       if (cs->num_old_cs_buffers + 1 >= AMDGPU_CS_MAX_IBS_PER_SUBMIT) {
>> +                               /* TODO: Allow to submit more than 4 IBs. */
>> +                               fprintf(stderr, "amdgpu: Maximum number of IBs "
>> +                                               "per submit reached.\n");
>> +                               cs->failed = true;
>> +                               cs->base.cdw = 0;
>> +                               return;
>> +                       }
>> +
>> +                       cs->old_cs_buffers =
>> +                               realloc(cs->old_cs_buffers,
>> +                                       (cs->num_old_cs_buffers + 1) * sizeof(*cs->old_cs_buffers));
>> +                       if (!cs->old_cs_buffers) {
> 
> leaking previous cs->old_cs_buffers memory here.

I don't think that matters much because if we fail here, the whole 
application will crash.

I have pushed the patch with the two cosmetic changes.

Thanks!

> 
>> +                               cs->failed = true;
>> +                               cs->base.cdw = 0;
>> +                               return;
>> +                       }
>> +
>> +                       /* Store the current one for submitting it later. */
>> +                       cs->old_cs_buffers[cs->num_old_cs_buffers].cdw = cs->base.cdw;
>> +                       cs->old_cs_buffers[cs->num_old_cs_buffers].max_dw = cs->base.max_dw;
>> +                       cs->old_cs_buffers[cs->num_old_cs_buffers].buf = cs->base.buf;
>> +                       cs->num_old_cs_buffers++;
>> +
>> +                       /* Reset the cs, it will be re-allocated below. */
>>                          cs->base.cdw = 0;
>> -                       return;
>> +                       cs->base.buf = NULL;
>> +
>> +                       /* Re-compute the number of dwords to allocate. */
>> +                       ib_dws = MAX2(cs->base.cdw + min_size,
>> +                                     MIN2(cs->base.max_dw * 2, limit_dws));
>> +                       if (ib_dws > limit_dws) {
>> +                               fprintf(stderr, "amdgpu: Too high number of "
>> +                                               "dwords to allocate\n");
>> +                               cs->failed = true;
>> +                               return;
>> +                       }
>>                  }
>>
>>                  uint32_t *new_buf = realloc(cs->base.buf, ib_dws * 4);
>> @@ -400,6 +447,15 @@ static void radv_amdgpu_cs_reset(struct radeon_winsys_cs *_cs)
>>                  cs->ib.ib_mc_address = radv_amdgpu_winsys_bo(cs->ib_buffer)->base.va;
>>                  cs->ib_size_ptr = &cs->ib.size;
>>                  cs->ib.size = 0;
>> +       } else {
>> +               for (unsigned i = 0; i < cs->num_old_cs_buffers; ++i) {
>> +                       struct radeon_winsys_cs *rcs = &cs->old_cs_buffers[i];
>> +                       free(rcs->buf);
>> +               }
>> +
>> +               free(cs->old_cs_buffers);
>> +               cs->old_cs_buffers = NULL;
>> +               cs->num_old_cs_buffers = 0;
>>          }
>>   }
>>
>> @@ -550,7 +606,8 @@ static void radv_amdgpu_cs_execute_secondary(struct radeon_winsys_cs *_parent,
>>   static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
>>                                        struct radeon_winsys_cs **cs_array,
>>                                        unsigned count,
>> -                                     struct radv_amdgpu_winsys_bo *extra_bo,
>> +                                     struct radv_amdgpu_winsys_bo **extra_bo_array,
>> +                                     unsigned num_extra_bo,
>>                                        struct radeon_winsys_cs *extra_cs,
>>                                        const struct radv_winsys_bo_list *radv_bo_list,
>>                                        amdgpu_bo_list_handle *bo_list)
>> @@ -580,7 +637,7 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
>>                                            bo_list);
>>                  free(handles);
>>                  pthread_mutex_unlock(&ws->global_bo_list_lock);
>> -       } else if (count == 1 && !extra_bo && !extra_cs && !radv_bo_list &&
>> +       } else if (count == 1 && !num_extra_bo && !extra_cs && !radv_bo_list &&
>>                     !radv_amdgpu_cs(cs_array[0])->num_virtual_buffers) {
>>                  struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[0];
>>                  if (cs->num_buffers == 0) {
>> @@ -590,8 +647,8 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
>>                  r = amdgpu_bo_list_create(ws->dev, cs->num_buffers, cs->handles,
>>                                            cs->priorities, bo_list);
>>          } else {
>> -               unsigned total_buffer_count = !!extra_bo;
>> -               unsigned unique_bo_count = !!extra_bo;
>> +               unsigned total_buffer_count = num_extra_bo;
>> +               unsigned unique_bo_count = num_extra_bo;
>>                  for (unsigned i = 0; i < count; ++i) {
>>                          struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[i];
>>                          total_buffer_count += cs->num_buffers;
>> @@ -619,9 +676,9 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
>>                          return -ENOMEM;
>>                  }
>>
>> -               if (extra_bo) {
>> -                       handles[0] = extra_bo->bo;
>> -                       priorities[0] = 8;
>> +               for (unsigned i = 0; i < num_extra_bo; i++) {
>> +                       handles[i] = extra_bo_array[i]->bo;
>> +                       priorities[i] = 8;
>>                  }
>>
>>                  for (unsigned i = 0; i < count + !!extra_cs; ++i) {
>> @@ -773,7 +830,7 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
>>                  }
>>          }
>>
>> -       r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, initial_preamble_cs,
>> +       r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, 0, initial_preamble_cs,
>>                                         radv_bo_list, &bo_list);
>>          if (r) {
>>                  fprintf(stderr, "amdgpu: buffer list creation failed for the "
>> @@ -842,7 +899,7 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
>>
>>                  memset(&request, 0, sizeof(request));
>>
>> -               r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, NULL,
>> +               r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, NULL, 0,
>>                                                 preamble_cs, radv_bo_list, &bo_list);
>>                  if (r) {
>>                          fprintf(stderr, "amdgpu: buffer list creation failed "
>> @@ -923,68 +980,127 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
>>          assert(cs_count);
>>
>>          for (unsigned i = 0; i < cs_count;) {
>> -               struct amdgpu_cs_ib_info ib = {0};
>> -               struct radeon_winsys_bo *bo = NULL;
>> +               struct amdgpu_cs_ib_info ibs[AMDGPU_CS_MAX_IBS_PER_SUBMIT] = {0};
>> +               unsigned number_of_ibs = 1;
>> +               struct radeon_winsys_bo *bos[AMDGPU_CS_MAX_IBS_PER_SUBMIT] = {0};
>>                  struct radeon_winsys_cs *preamble_cs = i ? continue_preamble_cs : initial_preamble_cs;
>> +               struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]);
>>                  uint32_t *ptr;
>>                  unsigned cnt = 0;
>>                  unsigned size = 0;
>>                  unsigned pad_words = 0;
>> -               if (preamble_cs)
>> -                       size += preamble_cs->cdw;
>>
>> -               while (i + cnt < cs_count && 0xffff8 - size >= radv_amdgpu_cs(cs_array[i + cnt])->base.cdw) {
>> -                       size += radv_amdgpu_cs(cs_array[i + cnt])->base.cdw;
>> -                       ++cnt;
>> -               }
>> +               if (cs->num_old_cs_buffers > 0) {
>> +                       /* Special path when the maximum size in dwords has
>> +                        * been reached because we need to handle more than one
>> +                        * IB per submit.
>> +                        */
>> +                       unsigned new_cs_count = cs->num_old_cs_buffers + 1;
>> +                       struct radeon_winsys_cs *new_cs_array[4];
> 
> new_cs_array[AMDGPU_CS_MAX_IBS_PER_SUBMIT] ?
> 
>> +                       unsigned idx = 0;
>> +
>> +                       for (unsigned j = 0; j < cs->num_old_cs_buffers; j++)
>> +                               new_cs_array[idx++] = &cs->old_cs_buffers[j];
>> +                       new_cs_array[idx++] = cs_array[i];
>> +
>> +                       for (unsigned j = 0; j < new_cs_count; j++) {
>> +                               struct radeon_winsys_cs *rcs = new_cs_array[j];
>> +                               bool needs_preamble = preamble_cs && j == 0;
>> +                               unsigned size = 0;
>> +
>> +                               if (needs_preamble)
>> +                                       size += preamble_cs->cdw;
>> +                               size += rcs->cdw;
>> +
>> +                               assert(size < 0xffff8);
>> +
>> +                               while(!size || (size & 7)) {
> 
> Since you touch this code, add a space after while?
> 
> GraÅžvydas
> 


More information about the mesa-dev mailing list