[PATCH] dix/dispatch: DoGetImage: Use a single buffer
Mark Kettenis
mark.kettenis at xs4all.nl
Sat Mar 29 11:25:41 PDT 2014
> From: Keith Packard <keithp at keithp.com>
> Date: Sat, 29 Mar 2014 10:38:01 -0700
>
> Daniel Kurtz <djkurtz at chromium.org> writes:
>
> > DoGetImage chops up an image into IMAGE_BUFSIZE strips.
> > This predates commit [0] (a pull from XFree86 to X.Org), at which time
> > IMAGE_BUFSIZE was 8k.
>
> It's worked like this since before R1, which was before the OS layer
> resized output buffers.
>
> > Since we are now allocating from the heap, there should no longer be
> > necessary, or desirable to chop up the DoGetImage() buffer.
>
> Can you show us performance comparisons so that we can see if it
> helps/hurts? There are additional disasters in this code path which
> makes the answer less than obvious from the driver level...
>
> What I always wanted to do was get the OS layer to allocate space for
> the whole image and get the DDX to dump the data right into that, rather
> than allocating a temporary buffer and doing two copies. Something like
>
> void *
> ReserveOutputSpace(ClientPtr client, int size);
>
> However, if you look at the FlushClient function, it doesn't track the
> start of the output data to send to the client, so we're actually
> memmove'ing the data after each write.
>
> So, we should fix that as well; sticking an offset into the output
> buffer, and only memmove'ing the remaining data when the amount left in
> the buffer is a small amount.
Heh, I just happened to be in there yesterday. The memmove'ing gets
incredibly insane if you're trying to move a large image through a
socket with a small socket buffer size (like the 4k that was passed
down to us by the CSRG). A typical 8-megapixel image produced by a
digital camera takes tens of seconds when you send it in 4k chunks and
memove the remainder of the image after each chunk with CPU usage by
Xorg going through the roof.
I tried making the FlushClient() implementation a bit smarter by using
a circular buffer to avoid the memmove'ing. Unfortunately that makes
resizing the output buffer more painful and introduces some additional
memcpy'ing there. I managed to speed things up a bit, but things were
still too slow. See the attached diff (against OpenBSD-current
xenocara, so basically Xorg 1.14.4). Your ReserveOutputSpace() idea
would certainly help here as it would eliminate or at least reduce the
overhead of resizing the circular buffer. Would Daniel's diff have
the same effect?
I ended up modifying libxtrans to set the socket buffer size to
something 64k to fix the performance problem I was investigating. See
the diff below. I guess most Linux uses a larger default socket
buffer size, in which case the performance problem might not be so
prominent.
> I guess my main question is whether you have a need to improve GetImage
> performance, or whether this is strictly a desire to clean up some
> code which is presumably sub-optimal?
In my case the problem shows up with Firefox, which apparently is
rendering images to the screen when downloading them, then grabs the
pixels with XGetImage() and re-renders using those grabbed pixels.
Seems utterly retarded to me, and it is potentially an issue with the
old cairo embedded by Firefox.
Index: io.c
===================================================================
RCS file: /cvs/xenocara/xserver/os/io.c,v
retrieving revision 1.11
diff -u -p -r1.11 io.c
--- io.c 24 Aug 2013 19:44:51 -0000 1.11
+++ io.c 29 Mar 2014 18:24:23 -0000
@@ -96,9 +96,11 @@ typedef struct _connectionOutput {
struct _connectionOutput *next;
unsigned char *buf;
int size;
+ int start;
int count;
} ConnectionOutput;
+static void AppendToOutputBuffer(ConnectionOutputPtr, const char *, int);
static ConnectionInputPtr AllocateInputBuffer(void);
static ConnectionOutputPtr AllocateOutputBuffer(void);
@@ -705,6 +707,7 @@ WriteToClient(ClientPtr who, int count,
{
OsCommPtr oc;
ConnectionOutputPtr oco;
+ static char padBuffer[3];
int padBytes;
const char *buf = __buf;
@@ -829,12 +832,9 @@ WriteToClient(ClientPtr who, int count,
NewOutputPending = TRUE;
FD_SET(oc->fd, &OutputPending);
- memmove((char *) oco->buf + oco->count, buf, count);
- oco->count += count;
- if (padBytes) {
- memset(oco->buf + oco->count, '\0', padBytes);
- oco->count += padBytes;
- }
+ AppendToOutputBuffer(oco, buf, count);
+ if (padBytes)
+ AppendToOutputBuffer(oco, padBuffer, padBytes);
return count;
}
@@ -854,7 +854,7 @@ FlushClient(ClientPtr who, OsCommPtr oc,
ConnectionOutputPtr oco = oc->output;
int connection = oc->fd;
XtransConnInfo trans_conn = oc->trans_conn;
- struct iovec iov[3];
+ struct iovec iov[4];
static char padBuffer[3];
const char *extraBuf = __extraBuf;
long written;
@@ -904,11 +904,17 @@ FlushClient(ClientPtr who, OsCommPtr oc,
before = 0; \
}
- InsertIOV((char *) oco->buf, oco->count)
- InsertIOV((char *) extraBuf, extraCount)
- InsertIOV(padBuffer, padsize)
+ if (oco->start + oco->count > oco->size) {
+ InsertIOV((char *) oco->buf + oco->start, oco->size - oco->start)
+ InsertIOV((char *) oco->buf, oco->count - (oco->size - oco->start))
+ }
+ else {
+ InsertIOV((char *) oco->buf + oco->start, oco->count)
+ }
+ InsertIOV((char *) extraBuf, extraCount)
+ InsertIOV(padBuffer, padsize)
- errno = 0;
+ errno = 0;
if (trans_conn && (len = _XSERVTransWritev(trans_conn, iov, i)) >= 0) {
written += len;
notWritten -= len;
@@ -930,41 +936,56 @@ FlushClient(ClientPtr who, OsCommPtr oc,
if (written < oco->count) {
if (written > 0) {
+ oco->start += written;
+ oco->start %= oco->size;
oco->count -= written;
- memmove((char *) oco->buf,
- (char *) oco->buf + written, oco->count);
written = 0;
}
}
else {
written -= oco->count;
+ oco->start = 0;
oco->count = 0;
}
if (notWritten > oco->size) {
unsigned char *obuf;
- obuf = (unsigned char *) realloc(oco->buf,
- notWritten + BUFSIZE);
+ if (oco->start > 0)
+ fprintf(stderr, "x");
+ else
+ fprintf(stderr, "y");
+ obuf = malloc(notWritten + BUFSIZE);
if (!obuf) {
_XSERVTransDisconnect(oc->trans_conn);
_XSERVTransClose(oc->trans_conn);
oc->trans_conn = NULL;
MarkClientException(who);
+ oco->start = 0;
oco->count = 0;
return -1;
}
+ if (oco->start + oco->count > oco->size) {
+ memcpy(obuf, (char *)oco->buf + oco->start, oco->size - oco->start);
+ memcpy((char *) obuf + (oco->size - oco->start), oco->buf, oco->count - (oco->size - oco->start));
+ oco->start = 0;
+ } else {
+ memcpy((char *) obuf, (char *) oco->buf + oco->start, oco->count);
+ oco->start = 0;
+ }
oco->size = notWritten + BUFSIZE;
+ free(oco->buf);
oco->buf = obuf;
}
/* If the amount written extended into the padBuffer, then the
difference "extraCount - written" may be less than 0 */
- if ((len = extraCount - written) > 0)
- memmove((char *) oco->buf + oco->count,
- extraBuf + written, len);
+ if ((len = extraCount - written) > 0) {
+ AppendToOutputBuffer(oco, extraBuf + written, len);
+ if (padsize)
+ AppendToOutputBuffer(oco, padBuffer, padsize);
+ }
- oco->count = notWritten; /* this will include the pad */
/* return only the amount explicitly requested */
return extraCount;
}
@@ -980,12 +1001,14 @@ FlushClient(ClientPtr who, OsCommPtr oc,
oc->trans_conn = NULL;
}
MarkClientException(who);
+ oco->start = 0;
oco->count = 0;
return -1;
}
}
/* everything was flushed out */
+ oco->start = 0;
oco->count = 0;
/* check to see if this client was write blocked */
if (AnyClientsWriteBlocked) {
@@ -1005,6 +1028,23 @@ FlushClient(ClientPtr who, OsCommPtr oc,
return extraCount; /* return only the amount explicitly requested */
}
+static void
+AppendToOutputBuffer(ConnectionOutputPtr oco, const char *buf, int count)
+{
+ int start, todo;
+
+ start = (oco->start + oco->count) % oco->size;
+ todo = count;
+ if (start + todo > oco->size) {
+ memmove((char *) oco->buf + start, buf, oco->size - start);
+ todo -= (oco->size - start);
+ buf += (oco->size - start);
+ start = 0;
+ }
+ memmove((char *) oco->buf + start, buf, todo);
+ oco->count += count;
+}
+
static ConnectionInputPtr
AllocateInputBuffer(void)
{
@@ -1040,6 +1080,7 @@ AllocateOutputBuffer(void)
return NULL;
}
oco->size = BUFSIZE;
+ oco->start = 0;
oco->count = 0;
return oco;
}
@@ -1074,6 +1115,7 @@ FreeOsBuffers(OsCommPtr oc)
else {
FreeOutputs = oco;
oco->next = (ConnectionOutputPtr) NULL;
+ oco->start = 0;
oco->count = 0;
}
}
Index: Xtranssock.c
===================================================================
RCS file: /cvs/xenocara/lib/libxtrans/Xtranssock.c,v
retrieving revision 1.7
diff -u -p -r1.7 Xtranssock.c
--- Xtranssock.c 8 Apr 2012 14:50:52 -0000 1.7
+++ Xtranssock.c 28 Mar 2014 19:07:31 -0000
@@ -445,6 +445,20 @@ TRANS(SocketOpen) (int i, int type)
}
#endif
+ if (Sockettrans2devtab[i].family == AF_UNIX)
+ {
+ SOCKLEN_T len = sizeof (int);
+ int val;
+
+ if (getsockopt (ciptr->fd, SOL_SOCKET, SO_SNDBUF,
+ (char *) &val, &len) == 0 && val < 64 * 1024)
+ {
+ val = 64 * 1024;
+ setsockopt (ciptr->fd, SOL_SOCKET, SO_SNDBUF,
+ (char *) &val, sizeof (int));
+ }
+ }
+
return ciptr;
}
More information about the xorg-devel
mailing list