[PATCH 04/20] drm/radeon: convert fence to uint64_t v4
Christian König
deathsimple at vodafone.de
Mon May 7 08:04:37 PDT 2012
On 07.05.2012 16:39, Jerome Glisse wrote:
> On Mon, May 7, 2012 at 7:42 AM, Christian König<deathsimple at vodafone.de> wrote:
>> From: Jerome Glisse<jglisse at redhat.com>
>>
>> This convert fence to use uint64_t sequence number intention is
>> to use the fact that uin64_t is big enough that we don't need to
>> care about wrap around.
>>
>> Tested with and without writeback using 0xFFFFF000 as initial
>> fence sequence and thus allowing to test the wrap around from
>> 32bits to 64bits.
>>
>> v2: Add comment about possible race btw CPU& GPU, add comment
>> stressing that we need 2 dword aligned for R600_WB_EVENT_OFFSET
>> Read fence sequenc in reverse order of GPU write them so we
>> mitigate the race btw CPU and GPU.
>>
>> v3: Drop the need for ring to emit the 64bits fence, and just have
>> each ring emit the lower 32bits of the fence sequence. We
>> handle the wrap over 32bits in fence_process.
>>
>> v4: Just a small optimization: Don't reread the last_seq value
>> if loop restarts, since we already know its value anyway.
>> Also start at zero not one for seq value and use pre instead
>> of post increment in emmit, otherwise wait_empty will deadlock.
> Why changing that v3 was already good no deadlock. I started at 1
> especialy for that, a signaled fence is set to 0 so it always compare
> as signaled. Just using preincrement is exactly like starting at one.
> I don't see the need for this change but if it makes you happy.
Not exactly, the last emitted sequence is also used in
radeon_fence_wait_empty. So when you use post increment
radeon_fence_wait_empty will actually not wait for the last emitted
fence to be signaled, but for last emitted + 1, so it practically waits
forever.
Without this change suspend (for example) will just lockup.
Cheers,
Christian.
>
> Cheers,
> Jerome
>> Signed-off-by: Jerome Glisse<jglisse at redhat.com>
>> Signed-off-by: Christian König<deathsimple at vodafone.de>
>> ---
>> drivers/gpu/drm/radeon/radeon.h | 39 ++++++-----
>> drivers/gpu/drm/radeon/radeon_fence.c | 116 +++++++++++++++++++++++----------
>> drivers/gpu/drm/radeon/radeon_ring.c | 9 ++-
>> 3 files changed, 107 insertions(+), 57 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
>> index e99ea81..cdf46bc 100644
>> --- a/drivers/gpu/drm/radeon/radeon.h
>> +++ b/drivers/gpu/drm/radeon/radeon.h
>> @@ -100,28 +100,32 @@ extern int radeon_lockup_timeout;
>> * Copy from radeon_drv.h so we don't have to include both and have conflicting
>> * symbol;
>> */
>> -#define RADEON_MAX_USEC_TIMEOUT 100000 /* 100 ms */
>> -#define RADEON_FENCE_JIFFIES_TIMEOUT (HZ / 2)
>> +#define RADEON_MAX_USEC_TIMEOUT 100000 /* 100 ms */
>> +#define RADEON_FENCE_JIFFIES_TIMEOUT (HZ / 2)
>> /* RADEON_IB_POOL_SIZE must be a power of 2 */
>> -#define RADEON_IB_POOL_SIZE 16
>> -#define RADEON_DEBUGFS_MAX_COMPONENTS 32
>> -#define RADEONFB_CONN_LIMIT 4
>> -#define RADEON_BIOS_NUM_SCRATCH 8
>> +#define RADEON_IB_POOL_SIZE 16
>> +#define RADEON_DEBUGFS_MAX_COMPONENTS 32
>> +#define RADEONFB_CONN_LIMIT 4
>> +#define RADEON_BIOS_NUM_SCRATCH 8
>>
>> /* max number of rings */
>> -#define RADEON_NUM_RINGS 3
>> +#define RADEON_NUM_RINGS 3
>> +
>> +/* fence seq are set to this number when signaled */
>> +#define RADEON_FENCE_SIGNALED_SEQ 0LL
>> +#define RADEON_FENCE_NOTEMITED_SEQ (~0LL)
>>
>> /* internal ring indices */
>> /* r1xx+ has gfx CP ring */
>> -#define RADEON_RING_TYPE_GFX_INDEX 0
>> +#define RADEON_RING_TYPE_GFX_INDEX 0
>>
>> /* cayman has 2 compute CP rings */
>> -#define CAYMAN_RING_TYPE_CP1_INDEX 1
>> -#define CAYMAN_RING_TYPE_CP2_INDEX 2
>> +#define CAYMAN_RING_TYPE_CP1_INDEX 1
>> +#define CAYMAN_RING_TYPE_CP2_INDEX 2
>>
>> /* hardcode those limit for now */
>> -#define RADEON_VA_RESERVED_SIZE (8<< 20)
>> -#define RADEON_IB_VM_MAX_SIZE (64<< 10)
>> +#define RADEON_VA_RESERVED_SIZE (8<< 20)
>> +#define RADEON_IB_VM_MAX_SIZE (64<< 10)
>>
>> /*
>> * Errata workarounds.
>> @@ -254,8 +258,9 @@ struct radeon_fence_driver {
>> uint32_t scratch_reg;
>> uint64_t gpu_addr;
>> volatile uint32_t *cpu_addr;
>> - atomic_t seq;
>> - uint32_t last_seq;
>> + /* seq is protected by ring emission lock */
>> + uint64_t seq;
>> + atomic64_t last_seq;
>> unsigned long last_activity;
>> wait_queue_head_t queue;
>> struct list_head emitted;
>> @@ -268,11 +273,9 @@ struct radeon_fence {
>> struct kref kref;
>> struct list_head list;
>> /* protected by radeon_fence.lock */
>> - uint32_t seq;
>> - bool emitted;
>> - bool signaled;
>> + uint64_t seq;
>> /* RB, DMA, etc. */
>> - int ring;
>> + unsigned ring;
>> struct radeon_semaphore *semaphore;
>> };
>>
>> diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
>> index 5bb78bf..feb2bbc 100644
>> --- a/drivers/gpu/drm/radeon/radeon_fence.c
>> +++ b/drivers/gpu/drm/radeon/radeon_fence.c
>> @@ -66,14 +66,14 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence)
>> unsigned long irq_flags;
>>
>> write_lock_irqsave(&rdev->fence_lock, irq_flags);
>> - if (fence->emitted) {
>> + if (fence->seq&& fence->seq< RADEON_FENCE_NOTEMITED_SEQ) {
>> write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
>> return 0;
>> }
>> - fence->seq = atomic_add_return(1,&rdev->fence_drv[fence->ring].seq);
>> + /* we are protected by the ring emission mutex */
>> + fence->seq = ++rdev->fence_drv[fence->ring].seq;
>> radeon_fence_ring_emit(rdev, fence->ring, fence);
>> trace_radeon_fence_emit(rdev->ddev, fence->seq);
>> - fence->emitted = true;
>> /* are we the first fence on a previusly idle ring? */
>> if (list_empty(&rdev->fence_drv[fence->ring].emitted)) {
>> rdev->fence_drv[fence->ring].last_activity = jiffies;
>> @@ -87,14 +87,60 @@ static bool radeon_fence_poll_locked(struct radeon_device *rdev, int ring)
>> {
>> struct radeon_fence *fence;
>> struct list_head *i, *n;
>> - uint32_t seq;
>> + uint64_t seq, last_seq;
>> + unsigned count_loop = 0;
>> bool wake = false;
>>
>> - seq = radeon_fence_read(rdev, ring);
>> - if (seq == rdev->fence_drv[ring].last_seq)
>> - return false;
>> + /* Note there is a scenario here for an infinite loop but it's
>> + * very unlikely to happen. For it to happen, the current polling
>> + * process need to be interrupted by another process and another
>> + * process needs to update the last_seq btw the atomic read and
>> + * xchg of the current process.
>> + *
>> + * More over for this to go in infinite loop there need to be
>> + * continuously new fence signaled ie radeon_fence_read needs
>> + * to return a different value each time for both the currently
>> + * polling process and the other process that xchg the last_seq
>> + * btw atomic read and xchg of the current process. And the
>> + * value the other process set as last seq must be higher than
>> + * the seq value we just read. Which means that current process
>> + * need to be interrupted after radeon_fence_read and before
>> + * atomic xchg.
>> + *
>> + * To be even more safe we count the number of time we loop and
>> + * we bail after 10 loop just accepting the fact that we might
>> + * have temporarly set the last_seq not to the true real last
>> + * seq but to an older one.
>> + */
>> + last_seq = atomic64_read(&rdev->fence_drv[ring].last_seq);
>> + do {
>> + seq = radeon_fence_read(rdev, ring);
>> + seq |= last_seq& 0xffffffff00000000LL;
>> + if (seq< last_seq) {
>> + seq += 0x100000000LL;
>> + }
>>
>> - rdev->fence_drv[ring].last_seq = seq;
>> + if (!wake&& seq == last_seq) {
>> + return false;
>> + }
>> + /* If we loop over we don't want to return without
>> + * checking if a fence is signaled as it means that the
>> + * seq we just read is different from the previous on.
>> + */
>> + wake = true;
>> + if ((count_loop++)> 10) {
>> + /* We looped over too many time leave with the
>> + * fact that we might have set an older fence
>> + * seq then the current real last seq as signaled
>> + * by the hw.
>> + */
>> + break;
>> + }
>> + last_seq = seq;
>> + } while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq)> seq);
>> +
>> + /* reset wake to false */
>> + wake = false;
>> rdev->fence_drv[ring].last_activity = jiffies;
>>
>> n = NULL;
>> @@ -112,7 +158,7 @@ static bool radeon_fence_poll_locked(struct radeon_device *rdev, int ring)
>> n = i->prev;
>> list_move_tail(i,&rdev->fence_drv[ring].signaled);
>> fence = list_entry(i, struct radeon_fence, list);
>> - fence->signaled = true;
>> + fence->seq = RADEON_FENCE_SIGNALED_SEQ;
>> i = n;
>> } while (i !=&rdev->fence_drv[ring].emitted);
>> wake = true;
>> @@ -128,7 +174,7 @@ static void radeon_fence_destroy(struct kref *kref)
>> fence = container_of(kref, struct radeon_fence, kref);
>> write_lock_irqsave(&fence->rdev->fence_lock, irq_flags);
>> list_del(&fence->list);
>> - fence->emitted = false;
>> + fence->seq = RADEON_FENCE_NOTEMITED_SEQ;
>> write_unlock_irqrestore(&fence->rdev->fence_lock, irq_flags);
>> if (fence->semaphore)
>> radeon_semaphore_free(fence->rdev, fence->semaphore);
>> @@ -145,9 +191,7 @@ int radeon_fence_create(struct radeon_device *rdev,
>> }
>> kref_init(&((*fence)->kref));
>> (*fence)->rdev = rdev;
>> - (*fence)->emitted = false;
>> - (*fence)->signaled = false;
>> - (*fence)->seq = 0;
>> + (*fence)->seq = RADEON_FENCE_NOTEMITED_SEQ;
>> (*fence)->ring = ring;
>> (*fence)->semaphore = NULL;
>> INIT_LIST_HEAD(&(*fence)->list);
>> @@ -163,18 +207,18 @@ bool radeon_fence_signaled(struct radeon_fence *fence)
>> return true;
>>
>> write_lock_irqsave(&fence->rdev->fence_lock, irq_flags);
>> - signaled = fence->signaled;
>> + signaled = (fence->seq == RADEON_FENCE_SIGNALED_SEQ);
>> /* if we are shuting down report all fence as signaled */
>> if (fence->rdev->shutdown) {
>> signaled = true;
>> }
>> - if (!fence->emitted) {
>> + if (fence->seq == RADEON_FENCE_NOTEMITED_SEQ) {
>> WARN(1, "Querying an unemitted fence : %p !\n", fence);
>> signaled = true;
>> }
>> if (!signaled) {
>> radeon_fence_poll_locked(fence->rdev, fence->ring);
>> - signaled = fence->signaled;
>> + signaled = (fence->seq == RADEON_FENCE_SIGNALED_SEQ);
>> }
>> write_unlock_irqrestore(&fence->rdev->fence_lock, irq_flags);
>> return signaled;
>> @@ -183,8 +227,8 @@ bool radeon_fence_signaled(struct radeon_fence *fence)
>> int radeon_fence_wait(struct radeon_fence *fence, bool intr)
>> {
>> struct radeon_device *rdev;
>> - unsigned long irq_flags, timeout;
>> - u32 seq;
>> + unsigned long irq_flags, timeout, last_activity;
>> + uint64_t seq;
>> int i, r;
>> bool signaled;
>>
>> @@ -207,7 +251,9 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr)
>> timeout = 1;
>> }
>> /* save current sequence value used to check for GPU lockups */
>> - seq = rdev->fence_drv[fence->ring].last_seq;
>> + seq = atomic64_read(&rdev->fence_drv[fence->ring].last_seq);
>> + /* Save current last activity valuee, used to check for GPU lockups */
>> + last_activity = rdev->fence_drv[fence->ring].last_activity;
>> read_unlock_irqrestore(&rdev->fence_lock, irq_flags);
>>
>> trace_radeon_fence_wait_begin(rdev->ddev, seq);
>> @@ -235,24 +281,23 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr)
>> }
>>
>> write_lock_irqsave(&rdev->fence_lock, irq_flags);
>> - /* check if sequence value has changed since last_activity */
>> - if (seq != rdev->fence_drv[fence->ring].last_seq) {
>> + /* test if somebody else has already decided that this is a lockup */
>> + if (last_activity != rdev->fence_drv[fence->ring].last_activity) {
>> write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
>> continue;
>> }
>>
>> - /* change sequence value on all rings, so nobody else things there is a lockup */
>> - for (i = 0; i< RADEON_NUM_RINGS; ++i)
>> - rdev->fence_drv[i].last_seq -= 0x10000;
>> -
>> - rdev->fence_drv[fence->ring].last_activity = jiffies;
>> write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
>>
>> if (radeon_ring_is_lockup(rdev, fence->ring,&rdev->ring[fence->ring])) {
>> -
>> /* good news we believe it's a lockup */
>> - printk(KERN_WARNING "GPU lockup (waiting for 0x%08X last fence id 0x%08X)\n",
>> - fence->seq, seq);
>> + dev_warn(rdev->dev, "GPU lockup (waiting for 0x%016llx last fence id 0x%016llx)\n",
>> + fence->seq, seq);
>> +
>> + /* change last activity so nobody else think there is a lockup */
>> + for (i = 0; i< RADEON_NUM_RINGS; ++i) {
>> + rdev->fence_drv[i].last_activity = jiffies;
>> + }
>>
>> /* mark the ring as not ready any more */
>> rdev->ring[fence->ring].ready = false;
>> @@ -387,9 +432,9 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
>> }
>> rdev->fence_drv[ring].cpu_addr =&rdev->wb.wb[index/4];
>> rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
>> - radeon_fence_write(rdev, atomic_read(&rdev->fence_drv[ring]..seq), ring);
>> + radeon_fence_write(rdev, rdev->fence_drv[ring].seq, ring);
>> rdev->fence_drv[ring].initialized = true;
>> - DRM_INFO("fence driver on ring %d use gpu addr 0x%08Lx and cpu addr 0x%p\n",
>> + DRM_INFO("fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n",
>> ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr);
>> write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
>> return 0;
>> @@ -400,7 +445,8 @@ static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring)
>> rdev->fence_drv[ring].scratch_reg = -1;
>> rdev->fence_drv[ring].cpu_addr = NULL;
>> rdev->fence_drv[ring].gpu_addr = 0;
>> - atomic_set(&rdev->fence_drv[ring].seq, 0);
>> + rdev->fence_drv[ring].seq = 0;
>> + atomic64_set(&rdev->fence_drv[ring].last_seq, 0);
>> INIT_LIST_HEAD(&rdev->fence_drv[ring].emitted);
>> INIT_LIST_HEAD(&rdev->fence_drv[ring].signaled);
>> init_waitqueue_head(&rdev->fence_drv[ring].queue);
>> @@ -458,12 +504,12 @@ static int radeon_debugfs_fence_info(struct seq_file *m, void *data)
>> continue;
>>
>> seq_printf(m, "--- ring %d ---\n", i);
>> - seq_printf(m, "Last signaled fence 0x%08X\n",
>> - radeon_fence_read(rdev, i));
>> + seq_printf(m, "Last signaled fence 0x%016lx\n",
>> + atomic64_read(&rdev->fence_drv[i].last_seq));
>> if (!list_empty(&rdev->fence_drv[i].emitted)) {
>> fence = list_entry(rdev->fence_drv[i].emitted.prev,
>> struct radeon_fence, list);
>> - seq_printf(m, "Last emitted fence %p with 0x%08X\n",
>> + seq_printf(m, "Last emitted fence %p with 0x%016llx\n",
>> fence, fence->seq);
>> }
>> }
>> diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
>> index a4d60ae..4ae222b 100644
>> --- a/drivers/gpu/drm/radeon/radeon_ring.c
>> +++ b/drivers/gpu/drm/radeon/radeon_ring.c
>> @@ -82,7 +82,7 @@ bool radeon_ib_try_free(struct radeon_device *rdev, struct radeon_ib *ib)
>> bool done = false;
>>
>> /* only free ib which have been emited */
>> - if (ib->fence&& ib->fence->emitted) {
>> + if (ib->fence&& ib->fence->seq< RADEON_FENCE_NOTEMITED_SEQ) {
>> if (radeon_fence_signaled(ib->fence)) {
>> radeon_fence_unref(&ib->fence);
>> radeon_sa_bo_free(rdev,&ib->sa_bo);
>> @@ -149,8 +149,9 @@ retry:
>> /* this should be rare event, ie all ib scheduled none signaled yet.
>> */
>> for (i = 0; i< RADEON_IB_POOL_SIZE; i++) {
>> - if (rdev->ib_pool.ibs[idx].fence&& rdev->ib_pool.ibs[idx].fence->emitted) {
>> - r = radeon_fence_wait(rdev->ib_pool.ibs[idx].fence, false);
>> + struct radeon_fence *fence = rdev->ib_pool.ibs[idx].fence;
>> + if (fence&& fence->seq< RADEON_FENCE_NOTEMITED_SEQ) {
>> + r = radeon_fence_wait(fence, false);
>> if (!r) {
>> goto retry;
>> }
>> @@ -173,7 +174,7 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib)
>> return;
>> }
>> radeon_mutex_lock(&rdev->ib_pool.mutex);
>> - if (tmp->fence&& !tmp->fence->emitted) {
>> + if (tmp->fence&& tmp->fence->seq == RADEON_FENCE_NOTEMITED_SEQ) {
>> radeon_sa_bo_free(rdev,&tmp->sa_bo);
>> radeon_fence_unref(&tmp->fence);
>> }
>> --
>> 1.7.5.4
>>
More information about the dri-devel
mailing list