Fastest 2D frame rate possible with android NDK, my try included, better options available?
I used the NDK and OpenGL ES 2.0 to display a frame as a texture on a GL_TRIANGLE_STRIP. This was done on a HTC Desire, same hardware as Nexus One. I tried to load multiple GL_RGBA textures and switch between the textures, because the normal fill rate with a single texture was disappointingly low:
- 1 texture: 4.78 fps
- 2 textures: 19.68 fps
- 3 textures: 20.18 fps
- 4 textures: 28.52 fps
- 5 textures: 29.01 fps
- 6 textures: 30.32 fps
I think even 30.32 fps RGBA is still too slow.
So is this the way to go to achieve the fastest 2D frame rate (with same quality)? Any suggestions to speed it up?
Here is the relevant code, it is based on the hello-gl2 NDK example:
=== GL2JNIView.java :
init(false, 0, 0);
ConfigChooser(5, 6, 5, 0, depth, stencil);
=== gl_code.cpp :
#include <GLES2/gl2.h>
#include <GLES2/gl2ext.h>
#include <android/log.h>
#include <stdlib.h>
#include <time.h>
typedef unsigned char byte;
static int view_width, view_height;
static byte* framebuffer;
static int framebuffer_size;
static GLuint texture_id[6];
static const char* vertexSrc =
"precision highp float;\n"
"precision highp int;\n"
"attribute vec4 vertexCoords;\n"
"attribute vec2 textureCoords;\n"
"varying vec2 f_textureCoords;\n"
"void main() {\n"
" f_textureCoords = textureCoords;\n"
" gl_Position = vertexCoords;\n"
"}\n";
static const char* fragmentSrc =
"precision highp float;\n"
"precision highp int;\n"
"uniform sampler2D texture;\n"
"varying vec2 f_textureCoords;\n"
"void main() {\n"
" gl_FragColor = texture2D(texture, f_textureCoords);\n"
"}\n";
static GLuint shaderProgram;
static GLint attrib_vertexCoords;
static GLint attrib_textureCoords;
static GLint uniform_texture;
static const GLfloat vertexCoords[] = {-1.0, 1.0, -1.0, -1.0, 1.0, 1.0, 1.0, -1.0};
static const GLfloat textureCoords[] = {0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0};
JNIEXPORT void JNICALL Java_com_android_gl2jni_GL2JNILib_init(JNIEnv * env, jobject obj, jint width, jint height) {
view_width = width;
view_height = height;
framebuffer_size = 4*view_width*view_height;
framebuffer = (byte*)calloc(framebuffer_size, sizeof(byte));
for (int i = 0; i < framebuffer_size; i++) framebuffer[i] = 0;
glViewport(0, 0, view_width, view_height);
glGenTextures(6, &texture_id[0]);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, texture_id[0]);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, view_width, view_height, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer);
glActiveTexture(GL_TEXTURE1);
glBindTexture(GL_TEXTURE_2D, texture_id[1]);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, view_width, view_height, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer);
glActiveTexture(GL_TEXTURE2);
glBindTexture(GL_TEXTURE_2D, texture_id[2]);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, view_width, view_height, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer);
glActiveTexture(GL_TEXTURE3);
glBindTexture(GL_TEXTURE_2D, texture_id[3]);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, view_width, view_height, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer);
glActiveTexture(GL_TEXTURE4);
glBindTexture(GL_TEXTURE_2D, texture_id[4]);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, view_width, view_height, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer);
glActiveTexture(GL_TEXTURE5);
glBindTexture(GL_TEXTURE_2D, texture_id[5]);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, view_width, view_height, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer);
shaderProgram = glCreateProgram();
GLuint vertexShader = glCreateShader(GL_VERTEX_SHADER);
glShaderSource(vertexShader, 1, &vertexSrc, NULL);
glCompileShader(vertexShader);
glAttachShader(shaderProgram, vertexShader);
GLuint fragmentShader = glCreateShader(GL_FRAGMENT_SHADER);
glShaderSource(fragmentShader, 1, &fragmentSrc, NULL);
glCompileShader(fragmentShader);
glAttachShader(shaderProgram, fragmentShader);
glLinkProgram(shaderProgram);
glUseProgram(shaderProgram);
uniform_texture = glGetUniformLocation(shaderProgram, "texture");
glUniform1i(uniform_texture, 0);
attrib_vertexCoords = glGetAttribLocation(shaderProgram, "vertexCoords");
glEnableVertexAttribArray(attrib_vertexCoords);
glVertexAttribPointer(attrib_vertexCoords, 2, GL_FLOAT, GL_FALSE, 0, vertexCoords);
attrib_textureCoords = glGetAttribLocation(shaderProgram, "textureCoords");
glEnableVertexAttribArray(attrib_textureCoords);
glVertexAttribPointer(attrib_textureCoords, 2, GL_FLOAT, GL_FALSE, 0, textureCoords);
}
JNIEXPORT void JNICALL Java_com_android_gl2jni_GL2JNILib_step(JNIEnv * env, jobject obj) {
static int frame_count = 0;
static clock_t last_time = clock();
static int last_frame_count = 0;
frame_count++;
if (clock()-last_time > 1e7) {
__android_log_print(ANDROID_LOG_INFO, "libgl2jni", "fps: %f", ((float)frame_count-last_frame_count)/(clock()-last_time)*1e6);
last_time = clock();
last_frame_count = frame_count;
}
static byte val = 0;
val++;
if (val == 256) val = 0;
for (int i = 0; i < framebuffer_size; i++) framebuffer[i] = val;
int tst = frame_count%6;
if (tst == 0) {
glActiveTexture(GL_TEXTURE0);
} else if (tst == 1) {
glActiveTexture(GL_TEXTURE1);
} else if (tst == 2) {
glActiveTexture(GL_TEXTURE2);
} else if (tst == 3) {
glActiveTexture(GL_TEXTURE3);
} else if (tst == 4) {
glActiveTexture(GL_TEXTURE4);
} else if (tst == 5) {
glActiveTexture(GL_TEXTURE5);
}
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, view_width, view_height, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
}
Fastest possible onscreen frame rate is effectively capped by screen refresh rate, which is vendor-specific. My guess would be at least 60 Hz (60 frames per second). Off-screen rendering is not capped by refresh-rate and depends on the intensity of computing you are performing. Endless loop with some gl code may run significantly faster than 60 Hz, or, for that matter, slower.
I realize your question is rather old, and you've likely either solved it or moved onto something else, but I'll give a suggestion in case if anyone else comes across this.
First of all,
glTexImage2D
requires the graphics subsystem to perform a memory free and reallocation of the texture object every time you call it, since the texture parameters can change between calls. An optimized driver might look at the width, height and format, and if they are all the same then the reallocation could be skipped, but it's not likely that the Android driver implementers are actually doing this.To avoid the texture reallocation completely, you can use
glTexSubImage2D
to replace the complete bitmap, or just a portion of it. If you combine this with your above texture buffering scheme, you should see a fairly large speed increase. You could even extend this to detect the modified areas of your display and only update the rectangular portions that have changed between frames.To summarize, change your texture initialization code to call
glTexImage2D
with a NULL bitmap pointer, so OpenGL only allocates the memory for the texture and doesn't actually copy any data into it like so:Then update every frame in your game loop with: