[PATCH 1/2 v5] mieq: Provide better adaptability and diagnostics during mieq overflow

Peter Hutterer peter.hutterer at who-t.net
Mon Oct 17 16:58:51 PDT 2011


On Sun, Oct 16, 2011 at 10:14:31PM -0700, Jeremy Huddleston wrote:
> This patch changes from a static length event queue (512) to one that
> starts at 128 and grows to 4096 as it overflows, logging each time it
> grows.
> 
> This change also allows for multiple backtraces to be printed when the
> server is wedged rather than just one.  This increased sampling should
> help identify the true hog in cases where one backtrace might be
> insufficient.
> 
> Signed-off-by: Jeremy Huddleston <jeremyhu at apple.com>
> ---
> Ok ... 5th time's the charm?
> Responded to Keith's recent comments
> 
>  mi/mieq.c |  174 ++++++++++++++++++++++++++++++++++++++++++++++++-------------
>  1 files changed, 138 insertions(+), 36 deletions(-)
> 
> diff --git a/mi/mieq.c b/mi/mieq.c
> index b75bde9..8569583 100644
> --- a/mi/mieq.c
> +++ b/mi/mieq.c
> @@ -59,7 +59,11 @@ in this Software without prior written authorization from The Open Group.
>  # include <X11/extensions/dpmsconst.h>
>  #endif
>  
> -#define QUEUE_SIZE  512
> +/* Queue size must be a power of 2 */
> +#define QUEUE_INITIAL_SIZE                 128
> +#define QUEUE_MAXIMUM_SIZE                4096
> +#define QUEUE_DROP_BACKTRACE_FREQUENCY     100
> +#define QUEUE_DROP_BACKTRACE_MAX            10
>  
>  #define EnqueueScreen(dev) dev->spriteInfo->sprite->pEnqueueScreen
>  #define DequeueScreen(dev) dev->spriteInfo->sprite->pDequeueScreen
> @@ -74,7 +78,9 @@ typedef struct _EventQueue {
>      HWEventQueueType head, tail;         /* long for SetInputCheck */
>      CARD32           lastEventTime;      /* to avoid time running backwards */
>      int              lastMotion;         /* device ID if last event motion? */
> -    EventRec         events[QUEUE_SIZE]; /* static allocation for signals */
> +    EventRec         *events;            /* our queue as an array */
> +    size_t           nevents;            /* the number of buckets in our queue */
> +    size_t           dropped;            /* counter for number of consecutive dropped events */
>      mieqHandler      handlers[128];      /* custom event handler */
>  } EventQueueRec, *EventQueuePtr;
>  
> @@ -99,25 +105,87 @@ static inline void wait_for_server_init(void) {
>  }
>  #endif
>  
> +/* Pre-condition: Called with miEventQueueMutex held */
> +static Bool
> +mieqGrowQueue(EventQueuePtr eventQueue, size_t new_nevents) {
> +    size_t i, n_enqueued, first_hunk;
> +    EventRec *new_events;
> +
> +    if (new_nevents <= eventQueue->nevents)
> +        return FALSE;
> +
> +    if (!eventQueue) {
> +        ErrorF("[mi] mieqGrowQueue called with a NULL eventQueue\n");
> +        return FALSE;
> +    }

this condition needs to be swapped with the one above it, otherwise you get
a NULL dereference

> +
> +    if (CountBits((const uint8_t *)&new_nevents, sizeof(new_nevents) != 1)) {

this looks like a misplacement of a parens. Plus, this requires a comment as
for the allowed values for nevents. Just add some documentation to
mieqGrowQueue(), it would make reading the code a bit easier.

> +        ErrorF("[mi] mieqGrowQueue called with an invalid size: %lu\n", new_nevents);
> +        return FALSE;
> +    }
> +
> +    new_events = calloc(new_nevents, sizeof(EventRec));
> +    if (new_events == NULL) {
> +    	ErrorF("[mi] mieqGrowQueue memory allocation error.\n");
> +        return FALSE;

indentation seems busted here.


> +    }
> +
> +    if (eventQueue->nevents) {
> +        /* % is not well-defined with negative numbers... sigh */
> +        n_enqueued = miEventQueue.tail - miEventQueue.head + eventQueue->nevents;

shouldn't this be using eventQueue?

> +        if (n_enqueued >= eventQueue->nevents)
> +            n_enqueued -= eventQueue->nevents;
> +    } else { 
> +        n_enqueued = 0;
> +    }
> +
> +    /* We block signals, so an mieqEnqueue triggered by SIGIO does not
> +     * write to our queue as we are modifying it.
> +     */
> +    OsBlockSignals();
> +
> +    /* First copy the existing events */
> +    first_hunk = eventQueue->nevents - eventQueue->head;
> +    memcpy(new_events,
> +           &eventQueue->events[miEventQueue.head],

shouldn't this be using eventQueue?

_please_ write a test. It's a self-contained function and these
types of errors are found very easily with a few simple for loops and
assert.

> +           first_hunk * sizeof(EventRec));
> +    memcpy(&new_events[first_hunk],
> +           eventQueue->events,
> +           (eventQueue->head) * sizeof(EventRec));

the () aren't necessary here.

> +
> +    /* Initialize the new portion */
> +    for (i = eventQueue->nevents; i < new_nevents; i++) {
> +        InternalEvent* evlist = InitEventList(1);
> +        if (!evlist) {
> +            size_t j;
> +            for (j = 0; j < i; j++)
> +                FreeEventList(new_events[j].events, 1);
> +            free(new_events);
> +            OsReleaseSignals();
> +            return FALSE;
> +        }
> +        new_events[i].events = evlist;
> +    }
> +
> +    /* And update our record */
> +    miEventQueue.tail = n_enqueued;
> +    miEventQueue.head = 0;
> +    miEventQueue.nevents = new_nevents;
> +    free(miEventQueue.events);
> +    miEventQueue.events = new_events;
> +
> +    OsReleaseSignals();
> +    return TRUE;
> +}
> +
>  Bool
>  mieqInit(void)
>  {
> -    int i;
> -
> -    miEventQueue.head = miEventQueue.tail = 0;
> +    memset(&miEventQueue, 0, sizeof(miEventQueue));
>      miEventQueue.lastEventTime = GetTimeInMillis ();
> -    miEventQueue.lastMotion = FALSE;
> -    for (i = 0; i < 128; i++)
> -        miEventQueue.handlers[i] = NULL;
> -    for (i = 0; i < QUEUE_SIZE; i++)
> -    {
> -	if (miEventQueue.events[i].events == NULL) {
> -	    InternalEvent* evlist = InitEventList(1);
> -	    if (!evlist)
> -		FatalError("Could not allocate event queue.\n");
> -	    miEventQueue.events[i].events = evlist;
> -	}
> -    }
> +
> +    if(!mieqGrowQueue(&miEventQueue, QUEUE_INITIAL_SIZE))
> +	FatalError("Could not allocate event queue.\n");
>  
>      SetInputCheck(&miEventQueue.head, &miEventQueue.tail);
>      return TRUE;
> @@ -127,13 +195,14 @@ void
>  mieqFini(void)
>  {
>      int i;
> -    for (i = 0; i < QUEUE_SIZE; i++)
> +    for (i = 0; i < miEventQueue.nevents; i++)
>      {
>  	if (miEventQueue.events[i].events != NULL) {
>  	    FreeEventList(miEventQueue.events[i].events, 1);
>  	    miEventQueue.events[i].events = NULL;
>  	}
>      }
> +    free(miEventQueue.events);
>  }
>  
>  /*
> @@ -157,6 +226,20 @@ mieqEnqueue(DeviceIntPtr pDev, InternalEvent *e)
>      pthread_mutex_lock(&miEventQueueMutex);
>  #endif
>  
> +    if (!miEventQueue.nevents) {
> +        /* The event queue is being resized due to previous overflow.
> +         * This event will be dropped on the floor.
> +         */
> +        if (!miEventQueue.dropped) {
> +            ErrorF("[mi] Event discarded during EQ resize.\n");
> +        }
> +        miEventQueue.dropped++;
> +#ifdef XQUARTZ
> +        pthread_mutex_unlock(&miEventQueueMutex);
> +#endif
> +        return;
> +    }
> +
>      verify_internal_event(e);
>  
>      /* avoid merging events from different devices */
> @@ -165,26 +248,31 @@ mieqEnqueue(DeviceIntPtr pDev, InternalEvent *e)
>  
>      if (isMotion && isMotion == miEventQueue.lastMotion &&
>          oldtail != miEventQueue.head) {
> -        oldtail = (oldtail - 1) % QUEUE_SIZE;
> -    }
> -    else {
> -        static int stuck = 0;
> +        oldtail = (oldtail - 1) % miEventQueue.nevents;
> +    } else if (((oldtail + 1) % miEventQueue.nevents) == miEventQueue.head) {
>          /* Toss events which come in late.  Usually this means your server's
>           * stuck in an infinite loop somewhere, but SIGIO is still getting
> -         * handled. */
> -        if (((oldtail + 1) % QUEUE_SIZE) == miEventQueue.head) {
> -            if (!stuck) {
> -                ErrorF("[mi] EQ overflowing. The server is probably stuck "
> -                        "in an infinite loop.\n");
> -                xorg_backtrace();
> -                stuck = 1;
> +         * handled.
> +         */
> +        miEventQueue.dropped++;
> +        if (miEventQueue.dropped == 1) {
> +            ErrorF("[mi] EQ overflowing.  Additional events will be discarded until existing events are processed.\n");
> +            xorg_backtrace();
> +            ErrorF("[mi] These backtraces from mieqEnqueue may point to a culprit higher up the stack.\n");
> +            ErrorF("[mi] mieq is *NOT* the cause.  It is a victim.\n");
> +        } else if (miEventQueue.dropped %  QUEUE_DROP_BACKTRACE_FREQUENCY == 0 &&
> +                   miEventQueue.dropped /  QUEUE_DROP_BACKTRACE_FREQUENCY <= QUEUE_DROP_BACKTRACE_MAX) {
> +            ErrorF("[mi] EQ overflow continuing.  %lu events have been dropped.\n", miEventQueue.dropped);
> +            if (miEventQueue.dropped /  QUEUE_DROP_BACKTRACE_FREQUENCY == QUEUE_DROP_BACKTRACE_MAX) {
> +                ErrorF("[mi] No further overflow reports will be reported until the clog is cleared.\n");
>              }
> +            xorg_backtrace();
> +        }
> +
>  #ifdef XQUARTZ
> -            pthread_mutex_unlock(&miEventQueueMutex);
> +        pthread_mutex_unlock(&miEventQueueMutex);
>  #endif
> -	        return;
> -        }
> -        stuck = 0;
> +        return;
>      }
>  
>      evlen = e->any.length;
> @@ -203,7 +291,7 @@ mieqEnqueue(DeviceIntPtr pDev, InternalEvent *e)
>      miEventQueue.events[oldtail].pDev = pDev;
>  
>      miEventQueue.lastMotion = isMotion;
> -    miEventQueue.tail = (oldtail + 1) % QUEUE_SIZE;
> +    miEventQueue.tail = (oldtail + 1) % miEventQueue.nevents;
>  #ifdef XQUARTZ
>      pthread_mutex_unlock(&miEventQueueMutex);
>  #endif
> @@ -441,7 +529,21 @@ mieqProcessInputEvents(void)
>  #ifdef XQUARTZ
>      pthread_mutex_lock(&miEventQueueMutex);
>  #endif
> -    
> +
> +    if ((miEventQueue.tail - miEventQueue.head) % miEventQueue.nevents >= (miEventQueue.nevents >> 1) &&

Wouldn't this suffer from the same issue regarding % on negative numbers?

also, please put comments in here. I had to stare at this for too long to
understand what you're doing here. a simple "grow queue when we have more
than half the queue size queued up" would have made this a bit easier.

(I also don't know why >> 1 is preferable to /2. Not really critical path
here anyway and /2 or * 2 would be much more obvious to read)

Cheers,
  Peter


> +        miEventQueue.nevents < QUEUE_MAXIMUM_SIZE) {
> +        ErrorF("[mi] Increasing EQ size to %lu to prevent dropped events.\n", miEventQueue.nevents << 1);
> +        if (!mieqGrowQueue(&miEventQueue, miEventQueue.nevents << 1)) {
> +            ErrorF("[mi] Increasing the size of EQ failed.\n");
> +        }
> +    }
> +
> +    if (miEventQueue.dropped) {
> +        ErrorF("[mi] EQ processing has resumed after %lu dropped events.\n", miEventQueue.dropped);
> +        ErrorF("[mi] This may be caused my a misbehaving driver monopolizing the server's resources.\n");
> +        miEventQueue.dropped = 0;
> +    }
> +
>      while (miEventQueue.head != miEventQueue.tail) {
>          e = &miEventQueue.events[miEventQueue.head];
>  
> @@ -449,7 +551,7 @@ mieqProcessInputEvents(void)
>          dev     = e->pDev;
>          screen  = e->pScreen;
>  
> -        miEventQueue.head = (miEventQueue.head + 1) % QUEUE_SIZE;
> +        miEventQueue.head = (miEventQueue.head + 1) % miEventQueue.nevents;
>  
>  #ifdef XQUARTZ
>          pthread_mutex_unlock(&miEventQueueMutex);
> -- 
> 1.7.6.1
> 
> 
> 
> 
> 


More information about the xorg-devel mailing list