From: havoc Date: Thu, 28 Feb 2013 09:30:03 +0000 (+0000) Subject: implemented use of GL_ARB_uniform_buffer_object for r_glsl_skeletal X-Git-Tag: xonotic-v0.8.0~96^2~124 X-Git-Url: https://git.rm.cloudns.org/?a=commitdiff_plain;h=a0214de923ad8e4f9583306f7520c1dcbe31dc3c;p=xonotic%2Fdarkplaces.git implemented use of GL_ARB_uniform_buffer_object for r_glsl_skeletal added R_BufferData_ system which allows arbitrary vertex/index/uniform buffers to be dynamically built during a frame (in the same way as the R_FrameData_ system), this has vastly better performance than uploading them individually and code should be migrated to use it as appropriate vid.support.glshaderversion added (typically 140) git-svn-id: svn://svn.icculus.org/twilight/trunk/darkplaces@11915 d7cf8633-e32d-0410-b094-e92efae38249 --- diff --git a/cl_screen.c b/cl_screen.c index 8458d2cf..249ee222 100644 --- a/cl_screen.c +++ b/cl_screen.c @@ -785,6 +785,14 @@ const char *r_stat_name[r_stat_count] = "vertexbufferuploadsize", "framedatacurrent", "framedatasize", + "bufferdatacurrent_vertex", // R_BUFFERDATA_ types are added to this index + "bufferdatacurrent_index16", + "bufferdatacurrent_index32", + "bufferdatacurrent_uniform", + "bufferdatasize_vertex", // R_BUFFERDATA_ types are added to this index + "bufferdatasize_index16", + "bufferdatasize_index32", + "bufferdatasize_uniform", "animcache_vertexmesh_count", "animcache_vertexmesh_vertices", "animcache_vertexmesh_maxvertices", diff --git a/client.h b/client.h index 8380f7b0..29de924a 100644 --- a/client.h +++ b/client.h @@ -70,6 +70,14 @@ typedef enum r_stat_e r_stat_vertexbufferuploadsize, r_stat_framedatacurrent, r_stat_framedatasize, + r_stat_bufferdatacurrent_vertex, // R_BUFFERDATA_ types are added to this index + r_stat_bufferdatacurrent_index16, + r_stat_bufferdatacurrent_index32, + r_stat_bufferdatacurrent_uniform, + r_stat_bufferdatasize_vertex, // R_BUFFERDATA_ types are added to this index + r_stat_bufferdatasize_index16, + r_stat_bufferdatasize_index32, + r_stat_bufferdatasize_uniform, r_stat_animcache_vertexmesh_count, r_stat_animcache_vertexmesh_vertices, r_stat_animcache_vertexmesh_maxvertices, @@ -539,9 +547,14 @@ typedef struct entity_render_s r_meshbuffer_t *animcache_vertex3fbuffer; r_vertexmesh_t *animcache_vertexmesh; r_meshbuffer_t *animcache_vertexmeshbuffer; - // gpu-skinning shader needs transforms in a certain format + // gpu-skinning shader needs transforms in a certain format, we have to + // upload this to a uniform buffer for the shader to use, and also keep a + // backup copy in system memory for the dynamic batch fallback code // if this is not NULL, the other animcache variables are NULL float *animcache_skeletaltransform3x4; + r_meshbuffer_t *animcache_skeletaltransform3x4buffer; + int animcache_skeletaltransform3x4offset; + int animcache_skeletaltransform3x4size; // current lighting from map (updated ONLY by client code, not renderer) vec3_t modellight_ambient; diff --git a/gl_backend.c b/gl_backend.c index f68d27b8..981c9ec2 100644 --- a/gl_backend.c +++ b/gl_backend.c @@ -235,6 +235,7 @@ typedef struct gl_state_s int lockrange_count; int vertexbufferobject; int elementbufferobject; + int uniformbufferobject; int framebufferobject; int defaultframebufferobject; // deal with platforms that use a non-zero default fbo qboolean pointer_color_enabled; @@ -1247,6 +1248,16 @@ static void GL_BindEBO(int bufferobject) } } +static void GL_BindUBO(int bufferobject) +{ + if (gl_state.uniformbufferobject != bufferobject) + { + gl_state.uniformbufferobject = bufferobject; + CHECKGLERROR + qglBindBufferARB(GL_UNIFORM_BUFFER, bufferobject);CHECKGLERROR + } +} + static const GLuint drawbuffers[4] = {GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1, GL_COLOR_ATTACHMENT2, GL_COLOR_ATTACHMENT3}; int R_Mesh_CreateFramebufferObject(rtexture_t *depthtexture, rtexture_t *colortexture, rtexture_t *colortexture2, rtexture_t *colortexture3, rtexture_t *colortexture4) { @@ -2782,9 +2793,9 @@ void R_Mesh_Draw(int firstvertex, int numvertices, int firsttriangle, int numtri if (!element3s_indexbuffer && gl_state.usevbo_dynamicindex) { if (gl_state.draw_dynamicindexbuffer) - R_Mesh_UpdateMeshBuffer(gl_state.draw_dynamicindexbuffer, (void *)element3s, numelements * sizeof(*element3s)); + R_Mesh_UpdateMeshBuffer(gl_state.draw_dynamicindexbuffer, (void *)element3s, numelements * sizeof(*element3s), false, 0); else - gl_state.draw_dynamicindexbuffer = R_Mesh_CreateMeshBuffer((void *)element3s, numelements * sizeof(*element3s), "temporary", true, true, true); + gl_state.draw_dynamicindexbuffer = R_Mesh_CreateMeshBuffer((void *)element3s, numelements * sizeof(*element3s), "temporary", true, false, true, true); element3s_indexbuffer = gl_state.draw_dynamicindexbuffer; element3s_bufferoffset = 0; } @@ -2794,9 +2805,9 @@ void R_Mesh_Draw(int firstvertex, int numvertices, int firsttriangle, int numtri if (!element3i_indexbuffer && gl_state.usevbo_dynamicindex) { if (gl_state.draw_dynamicindexbuffer) - R_Mesh_UpdateMeshBuffer(gl_state.draw_dynamicindexbuffer, (void *)element3i, numelements * sizeof(*element3i)); + R_Mesh_UpdateMeshBuffer(gl_state.draw_dynamicindexbuffer, (void *)element3i, numelements * sizeof(*element3i), false, 0); else - gl_state.draw_dynamicindexbuffer = R_Mesh_CreateMeshBuffer((void *)element3i, numelements * sizeof(*element3i), "temporary", true, true, false); + gl_state.draw_dynamicindexbuffer = R_Mesh_CreateMeshBuffer((void *)element3i, numelements * sizeof(*element3i), "temporary", true, false, true, false); element3i_indexbuffer = gl_state.draw_dynamicindexbuffer; element3i_bufferoffset = 0; } @@ -3276,25 +3287,36 @@ void R_Mesh_Finish(void) R_Mesh_SetRenderTargets(0, NULL, NULL, NULL, NULL, NULL); } -r_meshbuffer_t *R_Mesh_CreateMeshBuffer(const void *data, size_t size, const char *name, qboolean isindexbuffer, qboolean isdynamic, qboolean isindex16) +r_meshbuffer_t *R_Mesh_CreateMeshBuffer(const void *data, size_t size, const char *name, qboolean isindexbuffer, qboolean isuniformbuffer, qboolean isdynamic, qboolean isindex16) { r_meshbuffer_t *buffer; - if (!(isdynamic ? (isindexbuffer ? gl_state.usevbo_dynamicindex : gl_state.usevbo_dynamicvertex) : (isindexbuffer ? gl_state.usevbo_staticindex : gl_state.usevbo_staticvertex))) - return NULL; + if (isuniformbuffer) + { + if (!vid.support.arb_uniform_buffer_object) + return NULL; + } + else + { + if (!vid.support.arb_vertex_buffer_object) + return NULL; + if (!isdynamic && !(isindexbuffer ? gl_state.usevbo_staticindex : gl_state.usevbo_staticvertex)) + return NULL; + } buffer = (r_meshbuffer_t *)Mem_ExpandableArray_AllocRecord(&gl_state.meshbufferarray); memset(buffer, 0, sizeof(*buffer)); buffer->bufferobject = 0; buffer->devicebuffer = NULL; buffer->size = 0; buffer->isindexbuffer = isindexbuffer; + buffer->isuniformbuffer = isuniformbuffer; buffer->isdynamic = isdynamic; buffer->isindex16 = isindex16; strlcpy(buffer->name, name, sizeof(buffer->name)); - R_Mesh_UpdateMeshBuffer(buffer, data, size); + R_Mesh_UpdateMeshBuffer(buffer, data, size, false, 0); return buffer; } -void R_Mesh_UpdateMeshBuffer(r_meshbuffer_t *buffer, const void *data, size_t size) +void R_Mesh_UpdateMeshBuffer(r_meshbuffer_t *buffer, const void *data, size_t size, qboolean subdata, size_t offset) { if (!buffer) return; @@ -3317,11 +3339,18 @@ void R_Mesh_UpdateMeshBuffer(r_meshbuffer_t *buffer, const void *data, size_t si case RENDERPATH_GLES2: if (!buffer->bufferobject) qglGenBuffersARB(1, (GLuint *)&buffer->bufferobject); - if (buffer->isindexbuffer) + if (buffer->isuniformbuffer) + GL_BindUBO(buffer->bufferobject); + else if (buffer->isindexbuffer) GL_BindEBO(buffer->bufferobject); else GL_BindVBO(buffer->bufferobject); - qglBufferDataARB(buffer->isindexbuffer ? GL_ELEMENT_ARRAY_BUFFER : GL_ARRAY_BUFFER, size, data, buffer->isdynamic ? GL_STREAM_DRAW : GL_STATIC_DRAW); + if (subdata) + qglBufferSubDataARB(buffer->isuniformbuffer ? GL_UNIFORM_BUFFER : (buffer->isindexbuffer ? GL_ELEMENT_ARRAY_BUFFER : GL_ARRAY_BUFFER), offset, size, data); + else + qglBufferDataARB(buffer->isuniformbuffer ? GL_UNIFORM_BUFFER : (buffer->isindexbuffer ? GL_ELEMENT_ARRAY_BUFFER : GL_ARRAY_BUFFER), size, data, buffer->isdynamic ? GL_STREAM_DRAW : GL_STATIC_DRAW); + if (buffer->isuniformbuffer) + GL_BindUBO(0); break; case RENDERPATH_D3D9: #ifdef SUPPORTD3D @@ -3331,17 +3360,17 @@ void R_Mesh_UpdateMeshBuffer(r_meshbuffer_t *buffer, const void *data, size_t si if (buffer->isindexbuffer) { IDirect3DIndexBuffer9 *d3d9indexbuffer = (IDirect3DIndexBuffer9 *)buffer->devicebuffer; - if (size > buffer->size || !buffer->devicebuffer) + if (offset+size > buffer->size || !buffer->devicebuffer) { if (buffer->devicebuffer) IDirect3DIndexBuffer9_Release((IDirect3DIndexBuffer9*)buffer->devicebuffer); buffer->devicebuffer = NULL; - if (FAILED(result = IDirect3DDevice9_CreateIndexBuffer(vid_d3d9dev, size, buffer->isdynamic ? D3DUSAGE_WRITEONLY | D3DUSAGE_DYNAMIC : 0, buffer->isindex16 ? D3DFMT_INDEX16 : D3DFMT_INDEX32, buffer->isdynamic ? D3DPOOL_DEFAULT : D3DPOOL_MANAGED, &d3d9indexbuffer, NULL))) + if (FAILED(result = IDirect3DDevice9_CreateIndexBuffer(vid_d3d9dev, offset+size, buffer->isdynamic ? D3DUSAGE_WRITEONLY | D3DUSAGE_DYNAMIC : 0, buffer->isindex16 ? D3DFMT_INDEX16 : D3DFMT_INDEX32, buffer->isdynamic ? D3DPOOL_DEFAULT : D3DPOOL_MANAGED, &d3d9indexbuffer, NULL))) Sys_Error("IDirect3DDevice9_CreateIndexBuffer(%p, %d, %x, %x, %x, %p, NULL) returned %x\n", vid_d3d9dev, (int)size, buffer->isdynamic ? (int)D3DUSAGE_DYNAMIC : 0, buffer->isindex16 ? (int)D3DFMT_INDEX16 : (int)D3DFMT_INDEX32, buffer->isdynamic ? (int)D3DPOOL_DEFAULT : (int)D3DPOOL_MANAGED, &d3d9indexbuffer, (int)result); buffer->devicebuffer = (void *)d3d9indexbuffer; - buffer->size = size; + buffer->size = offset+size; } - if (!FAILED(IDirect3DIndexBuffer9_Lock(d3d9indexbuffer, 0, 0, &datapointer, buffer->isdynamic ? D3DLOCK_DISCARD : 0))) + if (!FAILED(IDirect3DIndexBuffer9_Lock(d3d9indexbuffer, (unsigned int)offset, (unsigned int)size, &datapointer, buffer->isdynamic ? D3DLOCK_DISCARD : 0))) { if (data) memcpy(datapointer, data, size); @@ -3353,17 +3382,17 @@ void R_Mesh_UpdateMeshBuffer(r_meshbuffer_t *buffer, const void *data, size_t si else { IDirect3DVertexBuffer9 *d3d9vertexbuffer = (IDirect3DVertexBuffer9 *)buffer->devicebuffer; - if (size > buffer->size || !buffer->devicebuffer) + if (offset+size > buffer->size || !buffer->devicebuffer) { if (buffer->devicebuffer) IDirect3DVertexBuffer9_Release((IDirect3DVertexBuffer9*)buffer->devicebuffer); buffer->devicebuffer = NULL; - if (FAILED(result = IDirect3DDevice9_CreateVertexBuffer(vid_d3d9dev, size, buffer->isdynamic ? D3DUSAGE_WRITEONLY | D3DUSAGE_DYNAMIC : 0, 0, buffer->isdynamic ? D3DPOOL_DEFAULT : D3DPOOL_MANAGED, &d3d9vertexbuffer, NULL))) + if (FAILED(result = IDirect3DDevice9_CreateVertexBuffer(vid_d3d9dev, offset+size, buffer->isdynamic ? D3DUSAGE_WRITEONLY | D3DUSAGE_DYNAMIC : 0, 0, buffer->isdynamic ? D3DPOOL_DEFAULT : D3DPOOL_MANAGED, &d3d9vertexbuffer, NULL))) Sys_Error("IDirect3DDevice9_CreateVertexBuffer(%p, %d, %x, %x, %x, %p, NULL) returned %x\n", vid_d3d9dev, (int)size, buffer->isdynamic ? (int)D3DUSAGE_DYNAMIC : 0, 0, buffer->isdynamic ? (int)D3DPOOL_DEFAULT : (int)D3DPOOL_MANAGED, &d3d9vertexbuffer, (int)result); buffer->devicebuffer = (void *)d3d9vertexbuffer; - buffer->size = size; + buffer->size = offset+size; } - if (!FAILED(IDirect3DVertexBuffer9_Lock(d3d9vertexbuffer, 0, 0, &datapointer, buffer->isdynamic ? D3DLOCK_DISCARD : 0))) + if (!FAILED(IDirect3DVertexBuffer9_Lock(d3d9vertexbuffer, (unsigned int)offset, (unsigned int)size, &datapointer, buffer->isdynamic ? D3DLOCK_DISCARD : 0))) { if (data) memcpy(datapointer, data, size); @@ -4204,9 +4233,9 @@ void R_Mesh_PrepareVertices_Vertex3f(int numvertices, const float *vertex3f, con if (!vertexbuffer && gl_state.usevbo_dynamicvertex) { if (gl_state.preparevertices_dynamicvertexbuffer) - R_Mesh_UpdateMeshBuffer(gl_state.preparevertices_dynamicvertexbuffer, vertex3f, numvertices * sizeof(float[3])); + R_Mesh_UpdateMeshBuffer(gl_state.preparevertices_dynamicvertexbuffer, vertex3f, numvertices * sizeof(float[3]), false, 0); else - gl_state.preparevertices_dynamicvertexbuffer = R_Mesh_CreateMeshBuffer(vertex3f, numvertices * sizeof(float[3]), "temporary", false, true, false); + gl_state.preparevertices_dynamicvertexbuffer = R_Mesh_CreateMeshBuffer(vertex3f, numvertices * sizeof(float[3]), "temporary", false, false, true, false); vertexbuffer = gl_state.preparevertices_dynamicvertexbuffer; } switch(vid.renderpath) @@ -4407,9 +4436,9 @@ void R_Mesh_PrepareVertices_Generic(int numvertices, const r_vertexgeneric_t *ve if (!vertexbuffer && gl_state.usevbo_dynamicvertex) { if (gl_state.preparevertices_dynamicvertexbuffer) - R_Mesh_UpdateMeshBuffer(gl_state.preparevertices_dynamicvertexbuffer, vertex, numvertices * sizeof(*vertex)); + R_Mesh_UpdateMeshBuffer(gl_state.preparevertices_dynamicvertexbuffer, vertex, numvertices * sizeof(*vertex), false, 0); else - gl_state.preparevertices_dynamicvertexbuffer = R_Mesh_CreateMeshBuffer(vertex, numvertices * sizeof(*vertex), "temporary", false, true, false); + gl_state.preparevertices_dynamicvertexbuffer = R_Mesh_CreateMeshBuffer(vertex, numvertices * sizeof(*vertex), "temporary", false, false, true, false); vertexbuffer = gl_state.preparevertices_dynamicvertexbuffer; } switch(vid.renderpath) @@ -4621,9 +4650,9 @@ void R_Mesh_PrepareVertices_Mesh(int numvertices, const r_vertexmesh_t *vertex, if (!vertexbuffer && gl_state.usevbo_dynamicvertex) { if (gl_state.preparevertices_dynamicvertexbuffer) - R_Mesh_UpdateMeshBuffer(gl_state.preparevertices_dynamicvertexbuffer, vertex, numvertices * sizeof(*vertex)); + R_Mesh_UpdateMeshBuffer(gl_state.preparevertices_dynamicvertexbuffer, vertex, numvertices * sizeof(*vertex), false, 0); else - gl_state.preparevertices_dynamicvertexbuffer = R_Mesh_CreateMeshBuffer(vertex, numvertices * sizeof(*vertex), "temporary", false, true, false); + gl_state.preparevertices_dynamicvertexbuffer = R_Mesh_CreateMeshBuffer(vertex, numvertices * sizeof(*vertex), "temporary", false, false, true, false); vertexbuffer = gl_state.preparevertices_dynamicvertexbuffer; } switch(vid.renderpath) diff --git a/gl_backend.h b/gl_backend.h index 0c6913d9..dcfacf08 100644 --- a/gl_backend.h +++ b/gl_backend.h @@ -72,8 +72,8 @@ void R_Mesh_Finish(void); // vertex buffer and index buffer creation/updating/freeing -r_meshbuffer_t *R_Mesh_CreateMeshBuffer(const void *data, size_t size, const char *name, qboolean isindexbuffer, qboolean isdynamic, qboolean isindex16); -void R_Mesh_UpdateMeshBuffer(r_meshbuffer_t *buffer, const void *data, size_t size); +r_meshbuffer_t *R_Mesh_CreateMeshBuffer(const void *data, size_t size, const char *name, qboolean isindexbuffer, qboolean isuniformbuffer, qboolean isdynamic, qboolean isindex16); +void R_Mesh_UpdateMeshBuffer(r_meshbuffer_t *buffer, const void *data, size_t size, qboolean subdata, size_t offset); void R_Mesh_DestroyMeshBuffer(r_meshbuffer_t *buffer); void GL_Mesh_ListVBOs(qboolean printeach); diff --git a/gl_rmain.c b/gl_rmain.c index 96b52e52..bd0f43c5 100644 --- a/gl_rmain.c +++ b/gl_rmain.c @@ -237,6 +237,13 @@ cvar_t r_glsl_saturation_redcompensate = {CVAR_SAVE, "r_glsl_saturation_redcompe cvar_t r_glsl_vertextextureblend_usebothalphas = {CVAR_SAVE, "r_glsl_vertextextureblend_usebothalphas", "0", "use both alpha layers on vertex blended surfaces, each alpha layer sets amount of 'blend leak' on another layer, requires mod_q3shader_force_terrain_alphaflag on."}; cvar_t r_framedatasize = {CVAR_SAVE, "r_framedatasize", "0.5", "size of renderer data cache used during one frame (for skeletal animation caching, light processing, etc)"}; +cvar_t r_bufferdatasize[R_BUFFERDATA_COUNT] = +{ + {CVAR_SAVE, "r_bufferdatasize_vertex", "4", "vertex buffer size for one frame"}, + {CVAR_SAVE, "r_bufferdatasize_index16", "1", "index buffer size for one frame (16bit indices)"}, + {CVAR_SAVE, "r_bufferdatasize_index32", "1", "index buffer size for one frame (32bit indices)"}, + {CVAR_SAVE, "r_bufferdatasize_uniform", "0.25", "uniform buffer size for one frame"}, +}; extern cvar_t v_glslgamma; extern cvar_t v_glslgamma_2d; @@ -248,6 +255,8 @@ r_framebufferstate_t r_fb; /// shadow volume bsp struct with automatically growing nodes buffer svbsp_t r_svbsp; +int r_uniformbufferalignment = 32; // dynamically updated to match GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT + rtexture_t *r_texture_blanknormalmap; rtexture_t *r_texture_white; rtexture_t *r_texture_grey128; @@ -844,6 +853,10 @@ typedef struct r_glsl_permutation_s int loc_NormalmapScrollBlend; int loc_BounceGridMatrix; int loc_BounceGridIntensity; + /// uniform block bindings + int ubibind_Skeletal_Transform12_UniformBlock; + /// uniform block indices + int ubiloc_Skeletal_Transform12_UniformBlock; } r_glsl_permutation_t; @@ -1051,6 +1064,7 @@ static char *R_GetShaderText(const char *filename, qboolean printfromdisknotice, static void R_GLSL_CompilePermutation(r_glsl_permutation_t *p, unsigned int mode, unsigned int permutation) { int i; + int ubibind; int sampler; shadermodeinfo_t *modeinfo = glslshadermodeinfo + mode; char *sourcestring; @@ -1072,8 +1086,18 @@ static void R_GLSL_CompilePermutation(r_glsl_permutation_t *p, unsigned int mode strlcat(permutationname, modeinfo->filename, sizeof(permutationname)); + // we need 140 for r_glsl_skeletal (GL_ARB_uniform_buffer_object) + if(vid.support.glshaderversion >= 140) + { + vertstrings_list[vertstrings_count++] = "#version 140\n"; + geomstrings_list[geomstrings_count++] = "#version 140\n"; + fragstrings_list[fragstrings_count++] = "#version 140\n"; + vertstrings_list[vertstrings_count++] = "#define GLSL140\n"; + geomstrings_list[geomstrings_count++] = "#define GLSL140\n"; + fragstrings_list[fragstrings_count++] = "#define GLSL140\n"; + } // if we can do #version 130, we should (this improves quality of offset/reliefmapping thanks to textureGrad) - if(vid.support.gl20shaders130) + else if(vid.support.glshaderversion >= 130) { vertstrings_list[vertstrings_count++] = "#version 130\n"; geomstrings_list[geomstrings_count++] = "#version 130\n"; @@ -1206,7 +1230,6 @@ static void R_GLSL_CompilePermutation(r_glsl_permutation_t *p, unsigned int mode p->loc_ShadowMap_Parameters = qglGetUniformLocation(p->program, "ShadowMap_Parameters"); p->loc_ShadowMap_TextureScale = qglGetUniformLocation(p->program, "ShadowMap_TextureScale"); p->loc_SpecularPower = qglGetUniformLocation(p->program, "SpecularPower"); - p->loc_Skeletal_Transform12 = qglGetUniformLocation(p->program, "Skeletal_Transform12"); p->loc_UserVec1 = qglGetUniformLocation(p->program, "UserVec1"); p->loc_UserVec2 = qglGetUniformLocation(p->program, "UserVec2"); p->loc_UserVec3 = qglGetUniformLocation(p->program, "UserVec3"); @@ -1255,6 +1278,7 @@ static void R_GLSL_CompilePermutation(r_glsl_permutation_t *p, unsigned int mode p->tex_Texture_ReflectMask = -1; p->tex_Texture_ReflectCube = -1; p->tex_Texture_BounceGrid = -1; + // bind the texture samplers in use sampler = 0; if (p->loc_Texture_First >= 0) {p->tex_Texture_First = sampler;qglUniform1i(p->loc_Texture_First , sampler);sampler++;} if (p->loc_Texture_Second >= 0) {p->tex_Texture_Second = sampler;qglUniform1i(p->loc_Texture_Second , sampler);sampler++;} @@ -1285,6 +1309,14 @@ static void R_GLSL_CompilePermutation(r_glsl_permutation_t *p, unsigned int mode if (p->loc_Texture_ReflectMask >= 0) {p->tex_Texture_ReflectMask = sampler;qglUniform1i(p->loc_Texture_ReflectMask , sampler);sampler++;} if (p->loc_Texture_ReflectCube >= 0) {p->tex_Texture_ReflectCube = sampler;qglUniform1i(p->loc_Texture_ReflectCube , sampler);sampler++;} if (p->loc_Texture_BounceGrid >= 0) {p->tex_Texture_BounceGrid = sampler;qglUniform1i(p->loc_Texture_BounceGrid , sampler);sampler++;} + // get the uniform block indices so we can bind them + p->ubiloc_Skeletal_Transform12_UniformBlock = qglGetUniformBlockIndex(p->program, "Skeletal_Transform12_UniformBlock"); + // clear the uniform block bindings + p->ubibind_Skeletal_Transform12_UniformBlock = -1; + // bind the uniform blocks in use + ubibind = 0; + if (p->ubiloc_Skeletal_Transform12_UniformBlock >= 0) {p->ubibind_Skeletal_Transform12_UniformBlock = ubibind;qglUniformBlockBinding(p->program, p->ubiloc_Skeletal_Transform12_UniformBlock, ubibind);ubibind++;} + // we're done compiling and setting up the shader, at least until it is used CHECKGLERROR Con_DPrintf("^5GLSL shader %s compiled (%i textures).\n", permutationname, sampler); } @@ -2031,6 +2063,7 @@ void R_SetupShader_DepthOrShadow(qboolean notrippy, qboolean depthrgb, qboolean case RENDERPATH_GL20: case RENDERPATH_GLES2: R_SetupShader_SetPermutationGLSL(SHADERMODE_DEPTH_OR_SHADOW, permutation); + if (r_glsl_permutation->ubiloc_Skeletal_Transform12_UniformBlock >= 0 && rsurface.batchskeletaltransform3x4buffer) qglBindBufferRange(GL_UNIFORM_BUFFER, r_glsl_permutation->ubibind_Skeletal_Transform12_UniformBlock, rsurface.batchskeletaltransform3x4buffer->bufferobject, rsurface.batchskeletaltransform3x4offset, rsurface.batchskeletaltransform3x4size); break; case RENDERPATH_GL13: case RENDERPATH_GLES1: @@ -2685,9 +2718,10 @@ void R_SetupShader_Surface(const vec3_t lightcolorbase, qboolean modellighting, R_Mesh_PrepareVertices_Mesh(rsurface.batchnumvertices, rsurface.batchvertexmesh, rsurface.batchvertexmeshbuffer); } // this has to be after RSurf_PrepareVerticesForBatch - if (rsurface.batchskeletaltransform3x4) + if (rsurface.batchskeletaltransform3x4buffer) permutation |= SHADERPERMUTATION_SKELETAL; R_SetupShader_SetPermutationGLSL(mode, permutation); + if (r_glsl_permutation->ubiloc_Skeletal_Transform12_UniformBlock >= 0 && rsurface.batchskeletaltransform3x4buffer) qglBindBufferRange(GL_UNIFORM_BUFFER, r_glsl_permutation->ubibind_Skeletal_Transform12_UniformBlock, rsurface.batchskeletaltransform3x4buffer->bufferobject, rsurface.batchskeletaltransform3x4offset, rsurface.batchskeletaltransform3x4size); if (r_glsl_permutation->loc_ModelToReflectCube >= 0) {Matrix4x4_ToArrayFloatGL(&rsurface.matrix, m16f);qglUniformMatrix4fv(r_glsl_permutation->loc_ModelToReflectCube, 1, false, m16f);} if (mode == SHADERMODE_LIGHTSOURCE) { @@ -2828,8 +2862,6 @@ void R_SetupShader_Surface(const vec3_t lightcolorbase, qboolean modellighting, } } if (r_glsl_permutation->tex_Texture_BounceGrid >= 0) R_Mesh_TexBind(r_glsl_permutation->tex_Texture_BounceGrid, r_shadow_bouncegridtexture); - if (r_glsl_permutation->loc_Skeletal_Transform12 >= 0 && rsurface.batchskeletalnumtransforms > 0) - qglUniform4fv(r_glsl_permutation->loc_Skeletal_Transform12, rsurface.batchskeletalnumtransforms*3, rsurface.batchskeletaltransform3x4); CHECKGLERROR break; case RENDERPATH_GL11: @@ -3952,6 +3984,7 @@ static void gl_main_start(void) r_texture_fogheighttexture = NULL; r_texture_gammaramps = NULL; r_texture_numcubemaps = 0; + r_uniformbufferalignment = 32; r_loaddds = r_texture_dds_load.integer != 0; r_savedds = vid.support.arb_texture_compression && vid.support.ext_texture_compression_s3tc && r_texture_dds_save.integer; @@ -3970,6 +4003,8 @@ static void gl_main_start(void) r_loadnormalmap = true; r_loadgloss = true; r_loadfog = false; + if (vid.support.arb_uniform_buffer_object) + qglGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &r_uniformbufferalignment); break; case RENDERPATH_GL13: case RENDERPATH_GLES1: @@ -3992,6 +4027,7 @@ static void gl_main_start(void) R_AnimCache_Free(); R_FrameData_Reset(); + R_BufferData_Reset(); r_numqueries = 0; r_maxqueries = 0; @@ -4043,6 +4079,7 @@ static void gl_main_shutdown(void) { R_AnimCache_Free(); R_FrameData_Reset(); + R_BufferData_Reset(); R_Main_FreeViewCache(); @@ -4136,10 +4173,12 @@ static void gl_main_newmap(void) R_Main_FreeViewCache(); R_FrameData_Reset(); + R_BufferData_Reset(); } void GL_Main_Init(void) { + int i; r_main_mempool = Mem_AllocPool("Renderer", 0, NULL); Cmd_AddCommand("r_glsl_restart", R_GLSL_Restart_f, "unloads GLSL shaders, they will then be reloaded as needed"); @@ -4312,6 +4351,8 @@ void GL_Main_Init(void) Cvar_RegisterVariable(&r_glsl_saturation_redcompensate); Cvar_RegisterVariable(&r_glsl_vertextextureblend_usebothalphas); Cvar_RegisterVariable(&r_framedatasize); + for (i = 0;i < R_BUFFERDATA_COUNT;i++) + Cvar_RegisterVariable(&r_bufferdatasize[i]); if (gamemode == GAME_NEHAHRA || gamemode == GAME_TENEBRAE) Cvar_SetValue("r_fullbrights", 0); R_RegisterModule("GL_Main", gl_main_start, gl_main_shutdown, gl_main_newmap, NULL, NULL); @@ -4497,12 +4538,12 @@ void R_FrameData_Reset(void) } } -static void R_FrameData_Resize(void) +static void R_FrameData_Resize(qboolean mustgrow) { size_t wantedsize; wantedsize = (size_t)(r_framedatasize.value * 1024*1024); wantedsize = bound(65536, wantedsize, 1000*1024*1024); - if (!r_framedata_mem || r_framedata_mem->wantedsize != wantedsize) + if (!r_framedata_mem || r_framedata_mem->wantedsize != wantedsize || mustgrow) { r_framedata_mem_t *newmem = (r_framedata_mem_t *)Mem_Alloc(r_main_mempool, wantedsize); newmem->wantedsize = wantedsize; @@ -4517,7 +4558,7 @@ static void R_FrameData_Resize(void) void R_FrameData_NewFrame(void) { - R_FrameData_Resize(); + R_FrameData_Resize(false); if (!r_framedata_mem) return; // if we ran out of space on the last frame, free the old memory now @@ -4536,6 +4577,7 @@ void R_FrameData_NewFrame(void) void *R_FrameData_Alloc(size_t size) { void *data; + float newvalue; // align to 16 byte boundary - the data pointer is already aligned, so we // only need to ensure the size of every allocation is also aligned @@ -4544,8 +4586,10 @@ void *R_FrameData_Alloc(size_t size) while (!r_framedata_mem || r_framedata_mem->current + size > r_framedata_mem->size) { // emergency - we ran out of space, allocate more memory - Cvar_SetValueQuick(&r_framedatasize, bound(0.25f, r_framedatasize.value * 2.0f, 128.0f)); - R_FrameData_Resize(); + newvalue = bound(0.25f, r_framedatasize.value * 2.0f, 256.0f); + // this might not be a growing it, but we'll allocate another buffer every time + Cvar_SetValueQuick(&r_framedatasize, newvalue); + R_FrameData_Resize(true); } data = r_framedata_mem->data + r_framedata_mem->current; @@ -4582,6 +4626,146 @@ void R_FrameData_ReturnToMark(void) //================================================================================== +// avoid reusing the same buffer objects on consecutive buffers +#define R_BUFFERDATA_CYCLE 2 + +typedef struct r_bufferdata_buffer_s +{ + struct r_bufferdata_buffer_s *purge; // older buffer to free on next frame + size_t size; // how much usable space + size_t current; // how much space in use + r_meshbuffer_t *buffer; // the buffer itself +} +r_bufferdata_buffer_t; + +static int r_bufferdata_cycle = 0; // incremented and wrapped each frame +static r_bufferdata_buffer_t *r_bufferdata_buffer[R_BUFFERDATA_CYCLE][R_BUFFERDATA_COUNT]; + +/// frees all dynamic buffers +void R_BufferData_Reset(void) +{ + int cycle, type; + r_bufferdata_buffer_t **p, *mem; + for (cycle = 0;cycle < R_BUFFERDATA_CYCLE;cycle++) + { + for (type = 0;type < R_BUFFERDATA_COUNT;type++) + { + // free all buffers + p = &r_bufferdata_buffer[r_bufferdata_cycle][type]; + while (*p) + { + mem = *p; + *p = (*p)->purge; + if (mem->buffer) + R_Mesh_DestroyMeshBuffer(mem->buffer); + Mem_Free(mem); + } + } + } +} + +// resize buffer as needed (this actually makes a new one, the old one will be recycled next frame) +static void R_BufferData_Resize(r_bufferdata_type_t type, qboolean mustgrow) +{ + r_bufferdata_buffer_t *mem = r_bufferdata_buffer[r_bufferdata_cycle][type]; + size_t size; + size = (size_t)(r_bufferdatasize[type].value * 1024*1024); + size = bound(65536, size, 512*1024*1024); + if (!mem || mem->size != size || mustgrow) + { + mem = (r_bufferdata_buffer_t *)Mem_Alloc(r_main_mempool, sizeof(*mem)); + mem->size = size; + mem->current = 0; + if (type == R_BUFFERDATA_VERTEX) + mem->buffer = R_Mesh_CreateMeshBuffer(NULL, mem->size, "dynamicbuffervertex", false, false, true, false); + else if (type == R_BUFFERDATA_INDEX16) + mem->buffer = R_Mesh_CreateMeshBuffer(NULL, mem->size, "dynamicbufferindex16", true, false, true, true); + else if (type == R_BUFFERDATA_INDEX32) + mem->buffer = R_Mesh_CreateMeshBuffer(NULL, mem->size, "dynamicbufferindex32", true, false, true, false); + else if (type == R_BUFFERDATA_UNIFORM) + mem->buffer = R_Mesh_CreateMeshBuffer(NULL, mem->size, "dynamicbufferuniform", false, true, true, false); + mem->purge = r_bufferdata_buffer[r_bufferdata_cycle][type]; + r_bufferdata_buffer[r_bufferdata_cycle][type] = mem; + } +} + +void R_BufferData_NewFrame(void) +{ + int type; + r_bufferdata_buffer_t **p, *mem; + // cycle to the next frame's buffers + r_bufferdata_cycle = (r_bufferdata_cycle + 1) % R_BUFFERDATA_CYCLE; + // if we ran out of space on the last time we used these buffers, free the old memory now + for (type = 0;type < R_BUFFERDATA_COUNT;type++) + { + if (r_bufferdata_buffer[r_bufferdata_cycle][type]) + { + R_BufferData_Resize(type, false); + // free all but the head buffer, this is how we recycle obsolete + // buffers after they are no longer in use + p = &r_bufferdata_buffer[r_bufferdata_cycle][type]->purge; + while (*p) + { + mem = *p; + *p = (*p)->purge; + if (mem->buffer) + R_Mesh_DestroyMeshBuffer(mem->buffer); + Mem_Free(mem); + } + // reset the current offset + r_bufferdata_buffer[r_bufferdata_cycle][type]->current = 0; + } + } +} + +r_meshbuffer_t *R_BufferData_Store(size_t datasize, void *data, r_bufferdata_type_t type, int *returnbufferoffset, qboolean allowfail) +{ + r_bufferdata_buffer_t *mem; + int offset = 0; + int padsize; + float newvalue; + + *returnbufferoffset = 0; + + // align size to a byte boundary appropriate for the buffer type, this + // makes all allocations have aligned start offsets + if (type == R_BUFFERDATA_UNIFORM) + padsize = (datasize + r_uniformbufferalignment - 1) & ~(r_uniformbufferalignment - 1); + else + padsize = (datasize + 15) & ~15; + + while (!r_bufferdata_buffer[r_bufferdata_cycle][type] || r_bufferdata_buffer[r_bufferdata_cycle][type]->current + padsize > r_bufferdata_buffer[r_bufferdata_cycle][type]->size) + { + // emergency - we ran out of space, allocate more memory + newvalue = bound(0.25f, r_bufferdatasize[type].value * 2.0f, 256.0f); + // if we're already at the limit, just fail (if allowfail is false we might run out of video ram) + if (newvalue == r_bufferdatasize[type].value && allowfail) + return NULL; + Cvar_SetValueQuick(&r_bufferdatasize[type], newvalue); + R_BufferData_Resize(type, true); + } + + mem = r_bufferdata_buffer[r_bufferdata_cycle][type]; + offset = mem->current; + mem->current += padsize; + + // upload the data to the buffer at the chosen offset + if (offset == 0) + R_Mesh_UpdateMeshBuffer(mem->buffer, NULL, mem->size, false, 0); + R_Mesh_UpdateMeshBuffer(mem->buffer, data, datasize, true, offset); + + // count the usage for stats + r_refdef.stats[r_stat_bufferdatacurrent_vertex + type] = max(r_refdef.stats[r_stat_bufferdatacurrent_vertex + type], (int)mem->current); + r_refdef.stats[r_stat_bufferdatasize_vertex + type] = max(r_refdef.stats[r_stat_bufferdatasize_vertex + type], (int)mem->size); + + // return the buffer offset + *returnbufferoffset = offset; + + return mem->buffer; +} + +//================================================================================== + // LordHavoc: animcache originally written by Echon, rewritten since then /** @@ -4601,14 +4785,17 @@ void R_AnimCache_ClearCache(void) for (i = 0;i < r_refdef.scene.numentities;i++) { ent = r_refdef.scene.entities[i]; - ent->animcache_vertex3f = NULL; - ent->animcache_normal3f = NULL; - ent->animcache_svector3f = NULL; - ent->animcache_tvector3f = NULL; - ent->animcache_vertexmesh = NULL; - ent->animcache_vertex3fbuffer = NULL; - ent->animcache_vertexmeshbuffer = NULL; - ent->animcache_skeletaltransform3x4 = NULL; + ent->animcache_vertex3f = NULL; // for shadow geometry + ent->animcache_normal3f = NULL; // for lit geometry + ent->animcache_svector3f = NULL; // for lit geometry + ent->animcache_tvector3f = NULL; // for lit geometry + ent->animcache_vertexmesh = NULL; // interleaved vertex arrays for D3D + ent->animcache_vertex3fbuffer = NULL; // vertex buffer for D3D + ent->animcache_vertexmeshbuffer = NULL; // vertex buffer for D3D + ent->animcache_skeletaltransform3x4 = NULL; // for dynamic batch fallback with r_glsl_skeletal + ent->animcache_skeletaltransform3x4buffer = NULL; // for r_glsl_skeletal + ent->animcache_skeletaltransform3x4offset = 0; + ent->animcache_skeletaltransform3x4size = 0; } } @@ -4771,6 +4958,9 @@ qboolean R_AnimCache_GetEntity(entity_render_t *ent, qboolean wantnormals, qbool R_ConcatTransforms(bonepose[i], model->data_baseboneposeinverse + i * 12, boneposerelative + i * 12); } } + // note: this can fail if the buffer is at the grow limit + ent->animcache_skeletaltransform3x4size = sizeof(float[3][4]) * model->num_bones; + ent->animcache_skeletaltransform3x4buffer = R_BufferData_Store(ent->animcache_skeletaltransform3x4size, ent->animcache_skeletaltransform3x4, R_BUFFERDATA_UNIFORM, &ent->animcache_skeletaltransform3x4offset, true); } else if (ent->animcache_vertex3f) { @@ -6861,7 +7051,7 @@ void R_UpdateVariables(void) switch(vid.renderpath) { case RENDERPATH_GL20: - r_gpuskeletal = r_glsl_skeletal.integer && !r_showsurfaces.integer; // FIXME add r_showsurfaces support to GLSL skeletal! + r_gpuskeletal = vid.support.arb_uniform_buffer_object && r_glsl_skeletal.integer && !r_showsurfaces.integer; // FIXME add r_showsurfaces support to GLSL skeletal! case RENDERPATH_D3D9: case RENDERPATH_D3D10: case RENDERPATH_D3D11: @@ -7003,6 +7193,7 @@ void R_RenderView(void) R_AnimCache_ClearCache(); R_FrameData_NewFrame(); + R_BufferData_NewFrame(); /* adjust for stereo display */ if(R_Stereo_Active()) @@ -8250,6 +8441,9 @@ void RSurf_ActiveWorldEntity(void) rsurface.basepolygonfactor = r_refdef.polygonfactor; rsurface.basepolygonoffset = r_refdef.polygonoffset; rsurface.entityskeletaltransform3x4 = NULL; + rsurface.entityskeletaltransform3x4buffer = NULL; + rsurface.entityskeletaltransform3x4offset = 0; + rsurface.entityskeletaltransform3x4size = 0;; rsurface.entityskeletalnumtransforms = 0; rsurface.modelvertex3f = model->surfmesh.data_vertex3f; rsurface.modelvertex3f_vertexbuffer = model->surfmesh.vbo_vertexbuffer; @@ -8380,7 +8574,10 @@ void RSurf_ActiveModelEntity(const entity_render_t *ent, qboolean wantnormals, q rsurface.basepolygonoffset += r_polygonoffset_submodel_offset.value; } // if the animcache code decided it should use the shader path, skip the deform step - rsurface.entityskeletaltransform3x4 = ent->animcache_vertex3f ? NULL : ent->animcache_skeletaltransform3x4; + rsurface.entityskeletaltransform3x4 = ent->animcache_skeletaltransform3x4; + rsurface.entityskeletaltransform3x4buffer = ent->animcache_skeletaltransform3x4buffer; + rsurface.entityskeletaltransform3x4offset = ent->animcache_skeletaltransform3x4offset; + rsurface.entityskeletaltransform3x4size = ent->animcache_skeletaltransform3x4size; rsurface.entityskeletalnumtransforms = rsurface.entityskeletaltransform3x4 ? model->num_bones : 0; if (model->surfmesh.isanimated && model->AnimateVertices && !rsurface.entityskeletaltransform3x4) { @@ -8592,6 +8789,9 @@ void RSurf_ActiveCustomEntity(const matrix4x4_t *matrix, const matrix4x4_t *inve rsurface.basepolygonfactor = r_refdef.polygonfactor; rsurface.basepolygonoffset = r_refdef.polygonoffset; rsurface.entityskeletaltransform3x4 = NULL; + rsurface.entityskeletaltransform3x4buffer = NULL; + rsurface.entityskeletaltransform3x4offset = 0; + rsurface.entityskeletaltransform3x4size = 0; rsurface.entityskeletalnumtransforms = 0; r_refdef.stats[r_stat_batch_entitycustom_count]++; r_refdef.stats[r_stat_batch_entitycustom_surfaces] += 1; @@ -9120,6 +9320,9 @@ void RSurf_PrepareVerticesForBatch(int batchneed, int texturenumsurfaces, const rsurface.batchelement3s_indexbuffer = rsurface.modelelement3s_indexbuffer; rsurface.batchelement3s_bufferoffset = rsurface.modelelement3s_bufferoffset; rsurface.batchskeletaltransform3x4 = rsurface.entityskeletaltransform3x4; + rsurface.batchskeletaltransform3x4buffer = rsurface.entityskeletaltransform3x4buffer; + rsurface.batchskeletaltransform3x4offset = rsurface.entityskeletaltransform3x4offset; + rsurface.batchskeletaltransform3x4size = rsurface.entityskeletaltransform3x4size; rsurface.batchskeletalnumtransforms = rsurface.entityskeletalnumtransforms; // if any dynamic vertex processing has to occur in software, we copy the @@ -9245,6 +9448,9 @@ void RSurf_PrepareVerticesForBatch(int batchneed, int texturenumsurfaces, const rsurface.batchelement3s = NULL; rsurface.batchelement3s_indexbuffer = NULL; rsurface.batchelement3s_bufferoffset = 0; + rsurface.batchskeletaltransform3x4buffer = NULL; + rsurface.batchskeletaltransform3x4offset = 0; + rsurface.batchskeletaltransform3x4size = 0; // we'll only be setting up certain arrays as needed if (batchneed & (BATCHNEED_VERTEXMESH_VERTEX | BATCHNEED_VERTEXMESH_NORMAL | BATCHNEED_VERTEXMESH_VECTOR | BATCHNEED_VERTEXMESH_VERTEXCOLOR | BATCHNEED_VERTEXMESH_TEXCOORD | BATCHNEED_VERTEXMESH_LIGHTMAP)) rsurface.batchvertexmesh = (r_vertexmesh_t *)R_FrameData_Alloc(batchnumvertices * sizeof(r_vertexmesh_t)); diff --git a/glquake.h b/glquake.h index 30b2512d..1802909d 100644 --- a/glquake.h +++ b/glquake.h @@ -676,6 +676,53 @@ extern void (GLAPIENTRY *qglDrawBuffersARB)(GLsizei n, const GLenum *bufs); #define GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT 0x8C4F #endif +// GL_ARB_uniform_buffer_object +#ifndef GL_UNIFORM_BUFFER +#define GL_UNIFORM_BUFFER 0x8A11 +#define GL_UNIFORM_BUFFER_BINDING 0x8A28 +#define GL_UNIFORM_BUFFER_START 0x8A29 +#define GL_UNIFORM_BUFFER_SIZE 0x8A2A +#define GL_MAX_VERTEX_UNIFORM_BLOCKS 0x8A2B +#define GL_MAX_GEOMETRY_UNIFORM_BLOCKS 0x8A2C +#define GL_MAX_FRAGMENT_UNIFORM_BLOCKS 0x8A2D +#define GL_MAX_COMBINED_UNIFORM_BLOCKS 0x8A2E +#define GL_MAX_UNIFORM_BUFFER_BINDINGS 0x8A2F +#define GL_MAX_UNIFORM_BLOCK_SIZE 0x8A30 +#define GL_MAX_COMBINED_VERTEX_UNIFORM_COMPONENTS 0x8A31 +#define GL_MAX_COMBINED_GEOMETRY_UNIFORM_COMPONENTS 0x8A32 +#define GL_MAX_COMBINED_FRAGMENT_UNIFORM_COMPONENTS 0x8A33 +#define GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT 0x8A34 +#define GL_ACTIVE_UNIFORM_BLOCK_MAX_NAME_LENGTH 0x8A35 +#define GL_ACTIVE_UNIFORM_BLOCKS 0x8A36 +#define GL_UNIFORM_TYPE 0x8A37 +#define GL_UNIFORM_SIZE 0x8A38 +#define GL_UNIFORM_NAME_LENGTH 0x8A39 +#define GL_UNIFORM_BLOCK_INDEX 0x8A3A +#define GL_UNIFORM_OFFSET 0x8A3B +#define GL_UNIFORM_ARRAY_STRIDE 0x8A3C +#define GL_UNIFORM_MATRIX_STRIDE 0x8A3D +#define GL_UNIFORM_IS_ROW_MAJOR 0x8A3E +#define GL_UNIFORM_BLOCK_BINDING 0x8A3F +#define GL_UNIFORM_BLOCK_DATA_SIZE 0x8A40 +#define GL_UNIFORM_BLOCK_NAME_LENGTH 0x8A41 +#define GL_UNIFORM_BLOCK_ACTIVE_UNIFORMS 0x8A42 +#define GL_UNIFORM_BLOCK_ACTIVE_UNIFORM_INDICES 0x8A43 +#define GL_UNIFORM_BLOCK_REFERENCED_BY_VERTEX_SHADER 0x8A44 +#define GL_UNIFORM_BLOCK_REFERENCED_BY_GEOMETRY_SHADER 0x8A45 +#define GL_UNIFORM_BLOCK_REFERENCED_BY_FRAGMENT_SHADER 0x8A46 +#define GL_INVALID_INDEX 0xFFFFFFFFu +#endif +extern void (GLAPIENTRY *qglGetUniformIndices)(GLuint program, GLsizei uniformCount, const char** uniformNames, GLuint* uniformIndices); +extern void (GLAPIENTRY *qglGetActiveUniformsiv)(GLuint program, GLsizei uniformCount, const GLuint* uniformIndices, GLenum pname, GLint* params); +extern void (GLAPIENTRY *qglGetActiveUniformName)(GLuint program, GLuint uniformIndex, GLsizei bufSize, GLsizei* length, char* uniformName); +extern GLuint (GLAPIENTRY *qglGetUniformBlockIndex)(GLuint program, const char* uniformBlockName); +extern void (GLAPIENTRY *qglGetActiveUniformBlockiv)(GLuint program, GLuint uniformBlockIndex, GLenum pname, GLint* params); +extern void (GLAPIENTRY *qglGetActiveUniformBlockName)(GLuint program, GLuint uniformBlockIndex, GLsizei bufSize, GLsizei* length, char* uniformBlockName); +extern void (GLAPIENTRY *qglBindBufferRange)(GLenum target, GLuint index, GLuint buffer, GLintptrARB offset, GLsizeiptrARB size); +extern void (GLAPIENTRY *qglBindBufferBase)(GLenum target, GLuint index, GLuint buffer); +extern void (GLAPIENTRY *qglGetIntegeri_v)(GLenum target, GLuint index, GLint* data); +extern void (GLAPIENTRY *qglUniformBlockBinding)(GLuint program, GLuint uniformBlockIndex, GLuint uniformBlockBinding); + extern void (GLAPIENTRY *qglScissor)(GLint x, GLint y, GLsizei width, GLsizei height); extern void (GLAPIENTRY *qglClearColor)(GLclampf red, GLclampf green, GLclampf blue, GLclampf alpha); diff --git a/model_shared.c b/model_shared.c index d4aabf2f..37be74af 100644 --- a/model_shared.c +++ b/model_shared.c @@ -1237,19 +1237,19 @@ static void Mod_ShadowMesh_CreateVBOs(shadowmesh_t *mesh, mempool_t *mempool) // upload r_vertexmesh_t array as a buffer if (mesh->vertexmesh && !mesh->vertexmeshbuffer) - mesh->vertexmeshbuffer = R_Mesh_CreateMeshBuffer(mesh->vertexmesh, mesh->numverts * sizeof(*mesh->vertexmesh), loadmodel->name, false, false, false); + mesh->vertexmeshbuffer = R_Mesh_CreateMeshBuffer(mesh->vertexmesh, mesh->numverts * sizeof(*mesh->vertexmesh), loadmodel->name, false, false, false, false); // upload vertex3f array as a buffer if (mesh->vertex3f && !mesh->vertex3fbuffer) - mesh->vertex3fbuffer = R_Mesh_CreateMeshBuffer(mesh->vertex3f, mesh->numverts * sizeof(float[3]), loadmodel->name, false, false, false); + mesh->vertex3fbuffer = R_Mesh_CreateMeshBuffer(mesh->vertex3f, mesh->numverts * sizeof(float[3]), loadmodel->name, false, false, false, false); // upload short indices as a buffer if (mesh->element3s && !mesh->element3s_indexbuffer) - mesh->element3s_indexbuffer = R_Mesh_CreateMeshBuffer(mesh->element3s, mesh->numtriangles * sizeof(short[3]), loadmodel->name, true, false, true); + mesh->element3s_indexbuffer = R_Mesh_CreateMeshBuffer(mesh->element3s, mesh->numtriangles * sizeof(short[3]), loadmodel->name, true, false, false, true); // upload int indices as a buffer if (mesh->element3i && !mesh->element3i_indexbuffer && !mesh->element3s) - mesh->element3i_indexbuffer = R_Mesh_CreateMeshBuffer(mesh->element3i, mesh->numtriangles * sizeof(int[3]), loadmodel->name, true, false, false); + mesh->element3i_indexbuffer = R_Mesh_CreateMeshBuffer(mesh->element3i, mesh->numtriangles * sizeof(int[3]), loadmodel->name, true, false, false, false); // vertex buffer is several arrays and we put them in the same buffer // @@ -1272,7 +1272,7 @@ static void Mod_ShadowMesh_CreateVBOs(shadowmesh_t *mesh, mempool_t *mempool) if (mesh->tvector3f ) memcpy(mem + mesh->vbooffset_tvector3f , mesh->tvector3f , mesh->numverts * sizeof(float[3])); if (mesh->normal3f ) memcpy(mem + mesh->vbooffset_normal3f , mesh->normal3f , mesh->numverts * sizeof(float[3])); if (mesh->texcoord2f ) memcpy(mem + mesh->vbooffset_texcoord2f , mesh->texcoord2f , mesh->numverts * sizeof(float[2])); - mesh->vbo_vertexbuffer = R_Mesh_CreateMeshBuffer(mem, size, "shadowmesh", false, false, false); + mesh->vbo_vertexbuffer = R_Mesh_CreateMeshBuffer(mem, size, "shadowmesh", false, false, false, false); Mem_Free(mem); } } @@ -3044,19 +3044,19 @@ void Mod_BuildVBOs(void) // upload r_vertexmesh_t array as a buffer if (loadmodel->surfmesh.vertexmesh && !loadmodel->surfmesh.vertexmeshbuffer) - loadmodel->surfmesh.vertexmeshbuffer = R_Mesh_CreateMeshBuffer(loadmodel->surfmesh.vertexmesh, loadmodel->surfmesh.num_vertices * sizeof(*loadmodel->surfmesh.vertexmesh), loadmodel->name, false, false, false); + loadmodel->surfmesh.vertexmeshbuffer = R_Mesh_CreateMeshBuffer(loadmodel->surfmesh.vertexmesh, loadmodel->surfmesh.num_vertices * sizeof(*loadmodel->surfmesh.vertexmesh), loadmodel->name, false, false, false, false); // upload vertex3f array as a buffer if (loadmodel->surfmesh.data_vertex3f && !loadmodel->surfmesh.vertex3fbuffer) - loadmodel->surfmesh.vertex3fbuffer = R_Mesh_CreateMeshBuffer(loadmodel->surfmesh.data_vertex3f, loadmodel->surfmesh.num_vertices * sizeof(float[3]), loadmodel->name, false, false, false); + loadmodel->surfmesh.vertex3fbuffer = R_Mesh_CreateMeshBuffer(loadmodel->surfmesh.data_vertex3f, loadmodel->surfmesh.num_vertices * sizeof(float[3]), loadmodel->name, false, false, false, false); // upload short indices as a buffer if (loadmodel->surfmesh.data_element3s && !loadmodel->surfmesh.data_element3s_indexbuffer) - loadmodel->surfmesh.data_element3s_indexbuffer = R_Mesh_CreateMeshBuffer(loadmodel->surfmesh.data_element3s, loadmodel->surfmesh.num_triangles * sizeof(short[3]), loadmodel->name, true, false, true); + loadmodel->surfmesh.data_element3s_indexbuffer = R_Mesh_CreateMeshBuffer(loadmodel->surfmesh.data_element3s, loadmodel->surfmesh.num_triangles * sizeof(short[3]), loadmodel->name, true, false, false, true); // upload int indices as a buffer if (loadmodel->surfmesh.data_element3i && !loadmodel->surfmesh.data_element3i_indexbuffer && !loadmodel->surfmesh.data_element3s) - loadmodel->surfmesh.data_element3i_indexbuffer = R_Mesh_CreateMeshBuffer(loadmodel->surfmesh.data_element3i, loadmodel->surfmesh.num_triangles * sizeof(int[3]), loadmodel->name, true, false, false); + loadmodel->surfmesh.data_element3i_indexbuffer = R_Mesh_CreateMeshBuffer(loadmodel->surfmesh.data_element3i, loadmodel->surfmesh.num_triangles * sizeof(int[3]), loadmodel->name, true, false, false, false); // only build a vbo if one has not already been created (this is important for brush models which load specially) // vertex buffer is several arrays and we put them in the same buffer @@ -3088,7 +3088,7 @@ void Mod_BuildVBOs(void) if (loadmodel->surfmesh.data_lightmapcolor4f ) memcpy(mem + loadmodel->surfmesh.vbooffset_lightmapcolor4f , loadmodel->surfmesh.data_lightmapcolor4f , loadmodel->surfmesh.num_vertices * sizeof(float[4])); if (loadmodel->surfmesh.data_skeletalindex4ub ) memcpy(mem + loadmodel->surfmesh.vbooffset_skeletalindex4ub , loadmodel->surfmesh.data_skeletalindex4ub , loadmodel->surfmesh.num_vertices * sizeof(unsigned char[4])); if (loadmodel->surfmesh.data_skeletalweight4ub ) memcpy(mem + loadmodel->surfmesh.vbooffset_skeletalweight4ub , loadmodel->surfmesh.data_skeletalweight4ub , loadmodel->surfmesh.num_vertices * sizeof(unsigned char[4])); - loadmodel->surfmesh.vbo_vertexbuffer = R_Mesh_CreateMeshBuffer(mem, size, loadmodel->name, false, false, false); + loadmodel->surfmesh.vbo_vertexbuffer = R_Mesh_CreateMeshBuffer(mem, size, loadmodel->name, false, false, false, false); Mem_Free(mem); } } diff --git a/model_shared.h b/model_shared.h index 4a50833f..1604c5fb 100644 --- a/model_shared.h +++ b/model_shared.h @@ -131,6 +131,7 @@ typedef struct r_meshbuffer_s void *devicebuffer; // Direct3D size_t size; qboolean isindexbuffer; + qboolean isuniformbuffer; qboolean isdynamic; qboolean isindex16; char name[MAX_QPATH]; diff --git a/render.h b/render.h index d567b542..3fbdcde1 100644 --- a/render.h +++ b/render.h @@ -166,16 +166,45 @@ int R_CullBoxCustomPlanes(const vec3_t mins, const vec3_t maxs, int numplanes, c #include "meshqueue.h" +/// free all R_FrameData memory void R_FrameData_Reset(void); +/// prepare for a new frame, recycles old buffers if a resize occurred previously void R_FrameData_NewFrame(void); +/// allocate some temporary memory for your purposes void *R_FrameData_Alloc(size_t size); +/// allocate some temporary memory and copy this data into it void *R_FrameData_Store(size_t size, void *data); +/// set a marker that allows you to discard the following temporary memory allocations void R_FrameData_SetMark(void); +/// discard recent memory allocations (rewind to marker) void R_FrameData_ReturnToMark(void); +/// enum of the various types of hardware buffer object used in rendering +/// note that the r_bufferdatasize[] array must be maintained to match this +typedef enum r_bufferdata_type_e +{ + R_BUFFERDATA_VERTEX, /// vertex buffer + R_BUFFERDATA_INDEX16, /// index buffer - 16bit (because D3D cares) + R_BUFFERDATA_INDEX32, /// index buffer - 32bit (because D3D cares) + R_BUFFERDATA_UNIFORM, /// uniform buffer + R_BUFFERDATA_COUNT /// how many kinds of buffer we have +} +r_bufferdata_type_t; + +/// free all dynamic vertex/index/uniform buffers +void R_BufferData_Reset(void); +/// begin a new frame (recycle old buffers) +void R_BufferData_NewFrame(void); +/// request space in a vertex/index/uniform buffer for the chosen data, returns the buffer pointer and offset, if allowfail is true it may return NULL if the growth limit has been reached, false will cause it to allocate additional memory despite this (warning: may run out of memory) +r_meshbuffer_t *R_BufferData_Store(size_t size, void *data, r_bufferdata_type_t type, int *returnbufferoffset, qboolean allowfail); + +/// free all R_AnimCache memory void R_AnimCache_Free(void); +/// clear the animcache pointers on all known render entities void R_AnimCache_ClearCache(void); +/// get the skeletal data or cached animated mesh data for an entity (optionally with normals and tangents) qboolean R_AnimCache_GetEntity(entity_render_t *ent, qboolean wantnormals, qboolean wanttangents); +/// generate animcache data for all entities marked visible void R_AnimCache_CacheVisibleEntities(void); #include "r_lerpanim.h" @@ -253,6 +282,9 @@ typedef struct rsurfacestate_s // variables int entityskeletalnumtransforms; // how many transforms are used for this mesh float *entityskeletaltransform3x4; // use gpu-skinning shader on this mesh + const r_meshbuffer_t *entityskeletaltransform3x4buffer; // uniform buffer + int entityskeletaltransform3x4offset; + int entityskeletaltransform3x4size; float *modelvertex3f; const r_meshbuffer_t *modelvertex3f_vertexbuffer; size_t modelvertex3f_bufferoffset; @@ -345,6 +377,9 @@ typedef struct rsurfacestate_s size_t batchelement3s_bufferoffset; int batchskeletalnumtransforms; float *batchskeletaltransform3x4; + const r_meshbuffer_t *batchskeletaltransform3x4buffer; // uniform buffer + int batchskeletaltransform3x4offset; + int batchskeletaltransform3x4size; // rendering pass processing arrays in GL11 and GL13 paths float *passcolor4f; const r_meshbuffer_t *passcolor4f_vertexbuffer; diff --git a/shader_glsl.h b/shader_glsl.h index b0993440..3e300d22 100644 --- a/shader_glsl.h +++ b/shader_glsl.h @@ -18,7 +18,7 @@ "# endif\n", "#endif\n", "\n", -"#ifdef GLSL130\n", +"#if defined(GLSL130) || defined(GLSL140)\n", "precision highp float;\n", "# ifdef VERTEX_SHADER\n", "# define dp_varying out\n", @@ -77,7 +77,14 @@ "dp_attribute vec4 Attrib_TexCoord4; // lightmap texcoords\n", "#ifdef USESKELETAL\n", "//uniform mat4 Skeletal_Transform[128];\n", -"uniform vec4 Skeletal_Transform12[768];\n", +"// this is used with glBindBufferRange to bind a uniform block to the name\n", +"// Skeletal_Transform12_UniformBlock, the Skeletal_Transform12 variable is\n", +"// directly accessible without a namespace.\n", +"// explanation: http://www.opengl.org/wiki/Interface_Block_%28GLSL%29#Syntax\n", +"uniform Skeletal_Transform12_UniformBlock\n", +"{\n", +" vec4 Skeletal_Transform12[768];\n", +"};\n", "dp_attribute vec4 Attrib_SkeletalIndex;\n", "dp_attribute vec4 Attrib_SkeletalWeight;\n", "#endif\n", diff --git a/vid.h b/vid.h index 21b8266e..6d30a039 100644 --- a/vid.h +++ b/vid.h @@ -45,7 +45,8 @@ renderpath_t; typedef struct viddef_support_s { qboolean gl20shaders; - qboolean gl20shaders130; + qboolean gl20shaders130; // indicates glBindFragDataLocation is available + qboolean glshaderversion; // typical values: 100 110 120 130 140 ... qboolean amd_texture_texture4; qboolean arb_depth_texture; qboolean arb_draw_buffers; @@ -59,6 +60,7 @@ typedef struct viddef_support_s qboolean arb_texture_gather; qboolean arb_texture_non_power_of_two; qboolean arb_vertex_buffer_object; + qboolean arb_uniform_buffer_object; qboolean ati_separate_stencil; qboolean ext_blend_minmax; qboolean ext_blend_subtract; diff --git a/vid_shared.c b/vid_shared.c index f5987e78..4cac85d8 100644 --- a/vid_shared.c +++ b/vid_shared.c @@ -507,6 +507,17 @@ void (GLAPIENTRY *qglGetQueryObjectivARB)(GLuint qid, GLenum pname, GLint *param void (GLAPIENTRY *qglGetQueryObjectuivARB)(GLuint qid, GLenum pname, GLuint *params); void (GLAPIENTRY *qglSampleCoverageARB)(GLclampf value, GLboolean invert); + +void (GLAPIENTRY *qglGetUniformIndices)(GLuint program, GLsizei uniformCount, const GLchar** uniformNames, GLuint* uniformIndices); +void (GLAPIENTRY *qglGetActiveUniformsiv)(GLuint program, GLsizei uniformCount, const GLuint* uniformIndices, GLenum pname, GLint* params); +void (GLAPIENTRY *qglGetActiveUniformName)(GLuint program, GLuint uniformIndex, GLsizei bufSize, GLsizei* length, GLchar* uniformName); +GLuint (GLAPIENTRY *qglGetUniformBlockIndex)(GLuint program, const GLchar* uniformBlockName); +void (GLAPIENTRY *qglGetActiveUniformBlockiv)(GLuint program, GLuint uniformBlockIndex, GLenum pname, GLint* params); +void (GLAPIENTRY *qglGetActiveUniformBlockName)(GLuint program, GLuint uniformBlockIndex, GLsizei bufSize, GLsizei* length, GLchar* uniformBlockName); +void (GLAPIENTRY *qglBindBufferRange)(GLenum target, GLuint index, GLuint buffer, GLintptrARB offset, GLsizeiptrARB size); +void (GLAPIENTRY *qglBindBufferBase)(GLenum target, GLuint index, GLuint buffer); +void (GLAPIENTRY *qglGetIntegeri_v)(GLenum target, GLuint index, GLint* data); +void (GLAPIENTRY *qglUniformBlockBinding)(GLuint program, GLuint uniformBlockIndex, GLuint uniformBlockBinding); #endif #if _MSC_VER >= 1400 @@ -865,6 +876,21 @@ static dllfunction_t vbofuncs[] = {NULL, NULL} }; +static dllfunction_t ubofuncs[] = +{ + {"glGetUniformIndices" , (void **) &qglGetUniformIndices}, + {"glGetActiveUniformsiv" , (void **) &qglGetActiveUniformsiv}, + {"glGetActiveUniformName" , (void **) &qglGetActiveUniformName}, + {"glGetUniformBlockIndex" , (void **) &qglGetUniformBlockIndex}, + {"glGetActiveUniformBlockiv" , (void **) &qglGetActiveUniformBlockiv}, + {"glGetActiveUniformBlockName", (void **) &qglGetActiveUniformBlockName}, + {"glBindBufferRange" , (void **) &qglBindBufferRange}, + {"glBindBufferBase" , (void **) &qglBindBufferBase}, + {"glGetIntegeri_v" , (void **) &qglGetIntegeri_v}, + {"glUniformBlockBinding" , (void **) &qglUniformBlockBinding}, + {NULL, NULL} +}; + static dllfunction_t arbfbofuncs[] = { {"glIsRenderbuffer" , (void **) &qglIsRenderbuffer}, @@ -997,18 +1023,18 @@ void VID_CheckExtensions(void) if (vid.support.gl20shaders) { - // this one is purely optional, needed for GLSL 1.3 support (#version 130), so we don't even check the return value of GL_CheckExtension - vid.support.gl20shaders130 = GL_CheckExtension("glshaders130", glsl130funcs, "-noglsl130", true); - if(vid.support.gl20shaders130) - { - char *s = (char *) qglGetString(GL_SHADING_LANGUAGE_VERSION); - if(!s || atof(s) < 1.30 - 0.00001) - vid.support.gl20shaders130 = 0; - } - if(vid.support.gl20shaders130) - Con_DPrintf("Using GLSL 1.30\n"); - else - Con_DPrintf("Using GLSL 1.00\n"); + char *s; + // detect what GLSL version is available, to enable features like r_glsl_skeletal and higher quality reliefmapping + vid.support.glshaderversion = 100; + s = (char *) qglGetString(GL_SHADING_LANGUAGE_VERSION); + if (s) + vid.support.glshaderversion = (int)(atof(s) * 100.0f + 0.5f); + if (vid.support.glshaderversion < 100) + vid.support.glshaderversion = 100; + Con_DPrintf("Detected GLSL #version %i\n", vid.support.glshaderversion); + // get the glBindFragDataLocation function + if (vid.support.glshaderversion >= 130) + vid.support.gl20shaders130 = GL_CheckExtension("glshaders130", glsl130funcs, "-noglsl130", true); } // GL drivers generally prefer GL_BGRA @@ -1029,6 +1055,7 @@ void VID_CheckExtensions(void) vid.support.arb_texture_non_power_of_two = GL_CheckExtension("GL_ARB_texture_non_power_of_two", NULL, "-notexturenonpoweroftwo", false); #endif vid.support.arb_vertex_buffer_object = GL_CheckExtension("GL_ARB_vertex_buffer_object", vbofuncs, "-novbo", false); + vid.support.arb_uniform_buffer_object = GL_CheckExtension("GL_ARB_uniform_buffer_object", ubofuncs, "-noubo", false); vid.support.ati_separate_stencil = GL_CheckExtension("separatestencil", gl2separatestencilfuncs, "-noseparatestencil", true) || GL_CheckExtension("GL_ATI_separate_stencil", atiseparatestencilfuncs, "-noseparatestencil", false); vid.support.ext_blend_minmax = GL_CheckExtension("GL_EXT_blend_minmax", blendequationfuncs, "-noblendminmax", false); vid.support.ext_blend_subtract = GL_CheckExtension("GL_EXT_blend_subtract", blendequationfuncs, "-noblendsubtract", false);