[PATCH] glx: Cache indirect opcode->index conversion
Chris Wilson
chris at chris-wilson.co.uk
Wed Jan 26 06:47:17 PST 2011
Decoding the opcode into the appropriate index into the dispatch tables
is quite expensive using the radix tree. By keeping a small cache, we
can dramatically speed up indirect function dispatch.
World of Padman over the network increased from 28fps to 45fps, with an
almost identical increase when run indirectly over a local socket.
Which serves as a nice reminder not to do this.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
glx/indirect_table.c | 9 ++++++---
glx/indirect_table.h | 13 +++++++++----
glx/indirect_util.c | 24 ++++++++++++++++++++----
glx/indirect_util.h | 4 ++--
4 files changed, 37 insertions(+), 13 deletions(-)
diff --git a/glx/indirect_table.c b/glx/indirect_table.c
index 01d1da3..747189a 100644
--- a/glx/indirect_table.c
+++ b/glx/indirect_table.c
@@ -188,9 +188,10 @@ static const void *Single_function_table[112][2] = {
/* [ 111] = 167 */ {NULL, NULL},
};
-const struct __glXDispatchInfo Single_dispatch_info = {
+struct __glXDispatchInfo Single_dispatch_info = {
8,
Single_dispatch_tree,
+ { { -1 } },
Single_function_table,
NULL,
NULL
@@ -1221,9 +1222,10 @@ static const gl_proto_size_func Render_size_func_table[74] = {
__glXDeleteFramebuffersEXTReqSize,
};
-const struct __glXDispatchInfo Render_dispatch_info = {
+struct __glXDispatchInfo Render_dispatch_info = {
13,
Render_dispatch_tree,
+ { { -1 } },
Render_function_table,
Render_size_table,
Render_size_func_table
@@ -1585,9 +1587,10 @@ static const void *VendorPriv_function_table[104][2] = {
/* [ 103] = 65551 */ {NULL, NULL},
};
-const struct __glXDispatchInfo VendorPriv_dispatch_info = {
+struct __glXDispatchInfo VendorPriv_dispatch_info = {
17,
VendorPriv_dispatch_tree,
+ { { -1 } },
VendorPriv_function_table,
NULL,
NULL
diff --git a/glx/indirect_table.h b/glx/indirect_table.h
index 4af1ccb..97dae3b 100644
--- a/glx/indirect_table.h
+++ b/glx/indirect_table.h
@@ -46,7 +46,12 @@ struct __glXDispatchInfo {
/**
*/
const int_fast16_t * dispatch_tree;
-
+#define DISPATCH_CACHE_SIZE 32
+ struct dispatch_cache {
+ uint16_t opcode;
+ int16_t index;
+ } dispatch_cache[DISPATCH_CACHE_SIZE];
+
/**
* Array of protocol decode and dispatch functions index by the opcode
* search tree (i.e., \c dispatch_tree). The first element in each pair
@@ -99,8 +104,8 @@ struct __glXDispatchInfo {
*/
#define IS_LEAF_INDEX(x) ((x) <= 0)
-extern const struct __glXDispatchInfo Single_dispatch_info;
-extern const struct __glXDispatchInfo Render_dispatch_info;
-extern const struct __glXDispatchInfo VendorPriv_dispatch_info;
+extern struct __glXDispatchInfo Single_dispatch_info;
+extern struct __glXDispatchInfo Render_dispatch_info;
+extern struct __glXDispatchInfo VendorPriv_dispatch_info;
#endif /* INDIRECT_TABLE_H */
diff --git a/glx/indirect_util.c b/glx/indirect_util.c
index 46a2706..114d561 100644
--- a/glx/indirect_util.c
+++ b/glx/indirect_util.c
@@ -200,8 +200,8 @@ __glXSendReplySwap( ClientPtr client, const void * data, size_t elements,
static int
-get_decode_index(const struct __glXDispatchInfo *dispatch_info,
- unsigned opcode)
+__get_decode_index(struct __glXDispatchInfo *dispatch_info,
+ unsigned opcode)
{
int remaining_bits;
int next_remain;
@@ -273,8 +273,24 @@ get_decode_index(const struct __glXDispatchInfo *dispatch_info,
}
+static inline int
+get_decode_index(struct __glXDispatchInfo *dispatch_info,
+ unsigned opcode)
+{
+ struct dispatch_cache *cache =
+ &dispatch_info->dispatch_cache[opcode & (DISPATCH_CACHE_SIZE-1)];
+
+ if (cache->opcode != opcode) {
+ cache->opcode = opcode;
+ cache->index = __get_decode_index(dispatch_info, opcode);
+ }
+
+ return cache->index;
+}
+
+
void *
-__glXGetProtocolDecodeFunction(const struct __glXDispatchInfo *dispatch_info,
+__glXGetProtocolDecodeFunction(struct __glXDispatchInfo *dispatch_info,
int opcode, int swapped_version)
{
const int func_index = get_decode_index(dispatch_info, opcode);
@@ -286,7 +302,7 @@ __glXGetProtocolDecodeFunction(const struct __glXDispatchInfo *dispatch_info,
int
-__glXGetProtocolSizeData(const struct __glXDispatchInfo *dispatch_info,
+__glXGetProtocolSizeData(struct __glXDispatchInfo *dispatch_info,
int opcode, __GLXrenderSizeData *data)
{
if (dispatch_info->size_table != NULL) {
diff --git a/glx/indirect_util.h b/glx/indirect_util.h
index b00727a..a1bcb22 100644
--- a/glx/indirect_util.h
+++ b/glx/indirect_util.h
@@ -43,11 +43,11 @@ extern void __glXSendReplySwap( ClientPtr client, const void * data,
struct __glXDispatchInfo;
extern void *__glXGetProtocolDecodeFunction(
- const struct __glXDispatchInfo *dispatch_info, int opcode,
+ struct __glXDispatchInfo *dispatch_info, int opcode,
int swapped_version);
extern int __glXGetProtocolSizeData(
- const struct __glXDispatchInfo *dispatch_info, int opcode,
+ struct __glXDispatchInfo *dispatch_info, int opcode,
__GLXrenderSizeData *data);
#endif /* __GLX_INDIRECT_UTIL_H__ */
--
1.7.2.3
More information about the xorg-devel
mailing list