[PATCH] glx: Cache indirect opcode->index conversion

Chris Wilson chris at chris-wilson.co.uk
Wed Jan 26 06:47:17 PST 2011


Decoding the opcode into the appropriate index into the dispatch tables
is quite expensive using the radix tree. By keeping a small cache, we
can dramatically speed up indirect function dispatch.

World of Padman over the network increased from 28fps to 45fps, with an
almost identical increase when run indirectly over a local socket.
Which serves as a nice reminder not to do this.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 glx/indirect_table.c |    9 ++++++---
 glx/indirect_table.h |   13 +++++++++----
 glx/indirect_util.c  |   24 ++++++++++++++++++++----
 glx/indirect_util.h  |    4 ++--
 4 files changed, 37 insertions(+), 13 deletions(-)

diff --git a/glx/indirect_table.c b/glx/indirect_table.c
index 01d1da3..747189a 100644
--- a/glx/indirect_table.c
+++ b/glx/indirect_table.c
@@ -188,9 +188,10 @@ static const void *Single_function_table[112][2] = {
     /* [ 111] =   167 */ {NULL, NULL},
 };
 
-const struct __glXDispatchInfo Single_dispatch_info = {
+struct __glXDispatchInfo Single_dispatch_info = {
     8,
     Single_dispatch_tree,
+    { { -1 } },
     Single_function_table,
     NULL,
     NULL
@@ -1221,9 +1222,10 @@ static const gl_proto_size_func Render_size_func_table[74] = {
    __glXDeleteFramebuffersEXTReqSize,
 };
 
-const struct __glXDispatchInfo Render_dispatch_info = {
+struct __glXDispatchInfo Render_dispatch_info = {
     13,
     Render_dispatch_tree,
+    { { -1 } },
     Render_function_table,
     Render_size_table,
     Render_size_func_table
@@ -1585,9 +1587,10 @@ static const void *VendorPriv_function_table[104][2] = {
     /* [ 103] = 65551 */ {NULL, NULL},
 };
 
-const struct __glXDispatchInfo VendorPriv_dispatch_info = {
+struct __glXDispatchInfo VendorPriv_dispatch_info = {
     17,
     VendorPriv_dispatch_tree,
+    { { -1 } },
     VendorPriv_function_table,
     NULL,
     NULL
diff --git a/glx/indirect_table.h b/glx/indirect_table.h
index 4af1ccb..97dae3b 100644
--- a/glx/indirect_table.h
+++ b/glx/indirect_table.h
@@ -46,7 +46,12 @@ struct __glXDispatchInfo {
     /**
      */
     const int_fast16_t * dispatch_tree;
-    
+#define DISPATCH_CACHE_SIZE 32
+    struct dispatch_cache {
+	    uint16_t opcode;
+	    int16_t index;
+    } dispatch_cache[DISPATCH_CACHE_SIZE];
+
     /**
      * Array of protocol decode and dispatch functions index by the opcode
      * search tree (i.e., \c dispatch_tree).  The first element in each pair
@@ -99,8 +104,8 @@ struct __glXDispatchInfo {
  */
 #define IS_LEAF_INDEX(x)   ((x) <= 0)
 
-extern const struct __glXDispatchInfo Single_dispatch_info;
-extern const struct __glXDispatchInfo Render_dispatch_info;
-extern const struct __glXDispatchInfo VendorPriv_dispatch_info;
+extern struct __glXDispatchInfo Single_dispatch_info;
+extern struct __glXDispatchInfo Render_dispatch_info;
+extern struct __glXDispatchInfo VendorPriv_dispatch_info;
 
 #endif /* INDIRECT_TABLE_H */
diff --git a/glx/indirect_util.c b/glx/indirect_util.c
index 46a2706..114d561 100644
--- a/glx/indirect_util.c
+++ b/glx/indirect_util.c
@@ -200,8 +200,8 @@ __glXSendReplySwap( ClientPtr client, const void * data, size_t elements,
 
 
 static int
-get_decode_index(const struct __glXDispatchInfo *dispatch_info,
-		 unsigned opcode)
+__get_decode_index(struct __glXDispatchInfo *dispatch_info,
+		   unsigned opcode)
 {
     int remaining_bits;
     int next_remain;
@@ -273,8 +273,24 @@ get_decode_index(const struct __glXDispatchInfo *dispatch_info,
 }
 
 
+static inline int
+get_decode_index(struct __glXDispatchInfo *dispatch_info,
+		 unsigned opcode)
+{
+    struct dispatch_cache *cache =
+	    &dispatch_info->dispatch_cache[opcode & (DISPATCH_CACHE_SIZE-1)];
+
+    if (cache->opcode != opcode) {
+	cache->opcode = opcode;
+	cache->index = __get_decode_index(dispatch_info, opcode);
+    }
+
+    return cache->index;
+}
+
+
 void *
-__glXGetProtocolDecodeFunction(const struct __glXDispatchInfo *dispatch_info,
+__glXGetProtocolDecodeFunction(struct __glXDispatchInfo *dispatch_info,
 			       int opcode, int swapped_version)
 {
     const int func_index = get_decode_index(dispatch_info, opcode);
@@ -286,7 +302,7 @@ __glXGetProtocolDecodeFunction(const struct __glXDispatchInfo *dispatch_info,
 
 
 int
-__glXGetProtocolSizeData(const struct __glXDispatchInfo *dispatch_info,
+__glXGetProtocolSizeData(struct __glXDispatchInfo *dispatch_info,
 			 int opcode, __GLXrenderSizeData *data)
 {
     if (dispatch_info->size_table != NULL) {
diff --git a/glx/indirect_util.h b/glx/indirect_util.h
index b00727a..a1bcb22 100644
--- a/glx/indirect_util.h
+++ b/glx/indirect_util.h
@@ -43,11 +43,11 @@ extern void __glXSendReplySwap( ClientPtr client, const void * data,
 struct __glXDispatchInfo;
 
 extern void *__glXGetProtocolDecodeFunction(
-    const struct __glXDispatchInfo *dispatch_info, int opcode,
+    struct __glXDispatchInfo *dispatch_info, int opcode,
     int swapped_version);
 
 extern int __glXGetProtocolSizeData(
-    const struct __glXDispatchInfo *dispatch_info, int opcode,
+    struct __glXDispatchInfo *dispatch_info, int opcode,
     __GLXrenderSizeData *data);
 
 #endif /* __GLX_INDIRECT_UTIL_H__ */
-- 
1.7.2.3



More information about the xorg-devel mailing list