#!/usr/bin/python # Purpose: Determine performance curves for various methods of pushing # triangles and quads through the OpenGL pipeline # Copyright (c) 2004-2006, Geoff Broadwell; this script is released # as open source and may be distributed and modified under the terms # of either the Artistic License or the GNU General Public License, # in the same manner as Perl itself. These licenses should have been # distributed to you as part of your Perl distribution, and can be # read using `perldoc perlartistic` and `perldoc perlgpl` respectively. # Rewritten in Python by Bob Free # ctypes enhancements by Aleksandar Samardzic # float division suggestion by Taro Ogawa from OpenGL.GLUT import * from OpenGL.GLU import * from OpenGL.GL import * from __future__ import division import time import sys import math import ctypes import PyBench VERSION = '0.1.24' ### MISC GLOBALS MIN_FRAMES = 0 MIN_SECONDS = 0 conf = 0 app = 0 gl_info = 0 w = 0 h = 0 test = 0 run = 0 done = 0 ready = 0 showing_graph = 0 empty_time = 0 empty_frames = 0 combos = [] slow = [] fast = [] stats = [] total = [] max = [] dls = {} vas = {} ### USER CONFIG # Primitive sizes (and therefore counts) are integer divisors of # (A^i * B^j * C^k ...) where good A, B, C, ... are relatively prime; # this number is used for the draw area height and width and defaults to: # 2^4 * 3^2 * 5 = 720 # You may also want to get fewer data points across the same range by # directly using higher powers; for example: # 16 * 9 * 5 = 720 # # max_powers = (16 => 1, 9 => 1, 5 => 1); max_powers = (2, 4, 3, 2, 5, 1) # Maximum quads along each axis for known slow versus usually fast tests; # chosen to be somewhat reasonable for most common settings of @max_powers # my $max_count_slow = 60; max_count_slow = 154 max_count_fast = 154 # Font to use to label graphs font_style = GLUT_BITMAP_HELVETICA_10 ### BENCHMARK INITS def make_quads_va(count,size): data = (ctypes.c_float * 2 * (count * count * 4))() for y in range(count): for x in range(count): index = (y * count + x) * 4 data[index][0] = x * size data[index][1] = y * size + size data[index + 1][0] = x * size data[index + 1][1] = y * size data[index + 2][0] = x * size + size data[index + 2][1] = y * size data[index + 3][0] = x * size + size data[index + 3][1] = y * size + size return data def make_qs_va(count,size): data = (ctypes.c_float * 2 * (count * (count + 1) * 2))() for y in range(count): for x in range(count + 1): index = (y * (count + 1) + x) * 2 data[index][0] = x * size data[index][1] = y * size + size data[index + 1][0] = x * size data[index + 1][1] = y * size return data def make_tris_va(count,size): data = (ctypes.c_float * 2 * (count * count * 6))() for y in range(count): for x in range(count): index = (y * count + x) * 6 data[index][0] = x * size data[index][1] = y * size + size data[index + 1][0] = x * size data[index + 1][1] = y * size data[index + 2][0] = x * size + size data[index + 2][1] = y * size + size data[index + 3][0] = x * size + size data[index + 3][1] = y * size + size data[index + 4][0] = x * size data[index + 4][1] = y * size data[index + 5][0] = x * size + size data[index + 5][1] = y * size return data def make_ts_va(count,size): data = (ctypes.c_float * 2 * (count * (count + 1) * 2))() for y in range(count): for x in range(count + 1): index = (y * (count + 1) + x) * 2 data[index][0] = x * size data[index][1] = y * size + size data[index + 1][0] = x * size data[index + 1][1] = y * size return data ### BENCHMARK METHODS def draw_empty(count,size): return def stats_empty(count,size): return [0, 0, 0, 0] def draw_quads(count,size): glBegin(GL_QUADS) for y in range(count): for x in range(count): glVertex2f(x * size, y * size + size) glVertex2f(x * size, y * size) glVertex2f(x * size + size, y * size) glVertex2f(x * size + size, y * size + size) glEnd() return def draw_quads_va(count,size): va = vas['q_' + `count`] glVertexPointerf(va) glEnableClientState(GL_VERTEX_ARRAY) glDrawArrays(GL_QUADS, 0, 4 * count * count) glDisableClientState(GL_VERTEX_ARRAY) return def stats_quads(count,size): length = size * count area = length * length prims = count * count tris = 2 * prims verts = 4 * prims return [area, prims, tris, verts] def draw_qs(count,size): for y in range(count): glBegin(GL_QUAD_STRIP) for x in range(count): glVertex2f(x * size, y * size + size) glVertex2f(x * size, y * size ) glEnd() return def draw_qs_va(count,size): va = vas['qs_' + `count`] row = 2 * (count + 1) glVertexPointerf(va) glEnableClientState(GL_VERTEX_ARRAY) for y in range(count): glDrawArrays(GL_QUAD_STRIP, y * row, row) glDisableClientState(GL_VERTEX_ARRAY) return def draw_qs_dl(count,size): glCallList(dls['qs_' + `count`]) return def draw_qs_va_dl(count,size): va = vas['qs_' + `count`] glVertexPointerf(va) glEnableClientState(GL_VERTEX_ARRAY) glCallList(dls['qsv_' + `count`]) glDisableClientState(GL_VERTEX_ARRAY) return def stats_qs(count,size): length = size * count area = length * length prims = count tris = 2 * count * prims verts = 2 * (count + 1) * prims return [area, prims, tris, verts] def draw_tris(count,size): glBegin(GL_TRIANGLES) for y in range(count): for x in range(count): glVertex2f(x * size, y * size + size) glVertex2f(x * size, y * size) glVertex2f(x * size + size, y * size + size) glVertex2f(x * size + size, y * size + size) glVertex2f(x * size, y * size) glVertex2f(x * size + size, y * size) glEnd() return def draw_tris_va(count,size): va = vas['t_' + `count`] glVertexPointerf(va) glEnableClientState(GL_VERTEX_ARRAY) glDrawArrays(GL_TRIANGLES, 0, 6 * count * count) glDisableClientState(GL_VERTEX_ARRAY) return def stats_tris(count,size): length = size * count area = length * length prims = 2 * count * count tris = prims verts = 3 * prims return [area, prims, tris, verts] def draw_ts(count,size): for y in range(count): glBegin(GL_TRIANGLE_STRIP) for x in range(count): glVertex2f(x * size, y * size + size) glVertex2f(x * size, y * size) glEnd() return def draw_ts_va(count,size): va = vas['ts_' + `count`] row = 2 * (count + 1) glVertexPointerf(va) glEnableClientState(GL_VERTEX_ARRAY) for y in range(count): glDrawArrays(GL_TRIANGLE_STRIP, y * row, row) glDisableClientState(GL_VERTEX_ARRAY) return def draw_ts_dl(count,size): glCallList(dls['ts_' + `count`]) return def draw_ts_va_dl(count,size): va = vas['ts_' + `count`] glVertexPointerf(va) glEnableClientState(GL_VERTEX_ARRAY) glCallList(dls['tsv_' + `count`]) glDisableClientState(GL_VERTEX_ARRAY) return def stats_ts(count,size): length = size * count area = length * length prims = count tris = 2 * count * prims verts = 2 * (count + 1) * prims return [area, prims, tris, verts] ### BENCHMARK TYPES va_types = { 'q': make_quads_va, 't': make_tris_va, 'qs': make_qs_va, 'ts': make_ts_va, } dl_types = { 'qs': draw_qs, 'ts': draw_ts, 'qsv': draw_qs_va, 'tsv': draw_ts_va, } tests = [ # Nick Draw Routine Stats Calc Type Graph Color [{'empty': draw_empty}, stats_empty, 'single', [1.0, 1.0, 1.0], 0xFFFF], [{'t': draw_tris}, stats_tris, 'slow', [1.0, 0.0, 0.0], 0xAAAA], [{'q': draw_quads}, stats_quads, 'slow', [1.0, .5, 0.0], 0xAAAA], [{'ts': draw_ts}, stats_ts, 'slow', [1.0, 1.0, 0.0], 0xAAAA], [{'qs': draw_qs}, stats_qs, 'slow', [0.0, 1.0, 0.0], 0xAAAA], [{'tsd': draw_ts_dl}, stats_ts, 'fast', [0.0, 1.0, 1.0], 0xAAAA], [{'qsd': draw_qs_dl}, stats_qs, 'fast', [0.0, 0.0, 1.0], 0xAAAA], [{'tv': draw_tris_va}, stats_tris, 'fast', [.8, 0.0, 0.0], 0xFFFF], [{'qv': draw_quads_va}, stats_quads, 'fast', [.8, .4, 0.0], 0xFFFF], [{'tsv': draw_ts_va}, stats_ts, 'fast', [.8, .8, 0.0], 0xFFFF], [{'qsv': draw_qs_va}, stats_qs, 'fast', [0.0, .8, 0.0], 0xFFFF], [{'tsvd': draw_ts_va_dl}, stats_ts, 'fast', [0.0, .8, .8], 0xFFFF], [{'qsvd': draw_qs_va_dl}, stats_qs, 'fast', [0.0, 0.0, .8], 0xFFFF], ] def fixup_stats(): global ready global stats global total global max global empty_time global empty_frames if (stats[0][0] == 'empty'): empty_time = stats[0][2] empty_frames = stats[0][3] empty_tpf = empty_time / empty_frames while (stats[0][0] == 'empty'): del stats[0] else: empty_time = 0 empty_frames = 0 empty_tpf = 0 total.extend(('totl,',0)) for i in range(12): total.append(0.0) max.extend(('max',0)) for i in range(12): max.append(0.0) for stat in stats: [name,count,secs,frames,pixpf,prmpf,tpf,vpf] = stat # Subtract out empty loop time, and loop if negative result # $time -= $empty_tpf * $frames; if (secs <= 0): for i in range(5): stat.append(0) continue # Calc "work", the geometric mean of pixels and vertices workpf = (pixpf * vpf) ** .5 # Calc fps fps = frames / secs # Calc other perf stats pixps = pixpf * fps prmps = prmpf * fps tps = tpf * fps vps = vpf * fps wps = workpf * fps # Add them to stat row stat.extend([fps,pixps,prmps,tps,vps,wps]) # Convert per frame counts to totals for i in range(4,8): stat[i] *= frames # Update running totals for i in range(2,8): total[i] += stat[i] # Update running maximums for i in range(2,14): if max[i] < stat[i]: max[i] = stat[i] # Calc averages for totals line for i in range(8,14): total[i] = total[i-5] / total[2] total[1] = 'avg' ready += 1 return def show_stats(): global total global stats global empty_time global empty_frames basic = ('Name','Cnt','Time') raw = ('Frms','Mpix','Kprim','Ktri','Kvert') calc = raw header = basic + raw + calc mags = (0,6,3,3,3,0,6,3,3,3) scale = [10**i for i in mags] g_form = "%9s%-*s%s" h_form = '%-5s%3s %6s' + (' %5s' * len(raw)) + '' + (' %5s' * len(calc)) format = '%-5s%3s %6.3f' + (' %5d' * len(raw)) + '' + (' %5d' * len(calc)) print g_form % ('', 6 * len(raw) + 8, 'MEASURED', 'PER SECOND') print h_form % header empty_stat = ( 'empty','1', empty_time, empty_frames, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) print format % empty_stat stats.append(total) for stat in stats: tstat = stat[:] for i in range(len(scale)): tstat[i + 3] /= scale[i] pstat = ( tstat[0],tstat[1],tstat[2],tstat[3],tstat[4],tstat[5],tstat[6], tstat[7],tstat[8],tstat[9],tstat[10],tstat[11],tstat[12]) print format % pstat return def kilo_mag(num): mag = int(math.log(num) / math.log(10)) return int(mag / 3) def mag_char(num): return ('','K','M','G','T','P','E','Z','Y')[kilo_mag(num)] def mag_scale(num): return 10 ** (3*kilo_mag(num)) def tick_inc(max,parts=5): if (max < 1): return max / parts mag = int(math.log(max) / math.log(10)) scl = 10 ** (mag - 1) inc = max / (scl * parts) if (inc > 7.5): inc = 10 elif (inc > 3.5): inc = 5 elif (inc > 1.5): inc = 2 else: inc = 1 return inc * scl def draw_one_stat(x_loc,y_loc,y_off,x_scale,num): global max global h y_max = max[num] y_scale = (h - 4.0 * y_off) / (2.0 * y_max) colors = {} stipple = {} for item in tests: name_hash = item[0] [name] = name_hash.keys() colors[name] = item[-2] stipple[name] = item[-1] last = '' glEnable(GL_LINE_STIPPLE) glBegin(GL_LINE_STRIP) for run in range(len(stats)-1): stat = stats[run] name = stat[0] count = stat[1] value = stat[num] if (name != last): glEnd() glLineStipple(3, stipple[name]) glBegin(GL_LINE_STRIP) color = colors[name] glColor3f(color[0],color[1],color[2]) last = name glVertex2f(count * x_scale + x_loc, value * y_scale + y_loc) glEnd() glDisable(GL_LINE_STIPPLE) return def draw_stats(): global ready if (not ready): return global w global h global slow global fast global tests global max # Graph config x_off = 10 y_off = 10 tick_size = 3 val_space = 50 key_size = 20 x_count = len(fast) and fast[-1] or slow[-1] x_scale = (w - 4.0 * x_off) / (2.0 * x_count) key_scale = (h - 4.0 * y_off) / (2.0 * len(tests)) # Get a fresh black frame for graphing glClearColor(0, 0, 0, 1) start_frame() # Use antialiased lines glEnable(GL_BLEND) glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA) glEnable(GL_LINE_SMOOTH) glHint(GL_LINE_SMOOTH_HINT, GL_NICEST) # Draw axis ticks glColor3f(1, 1, 1) glBegin(GL_LINES) for count in ([0] + slow + fast): x_tick = count * x_scale + x_off glVertex2f(x_tick, y_off) glVertex2f(x_tick, y_off - tick_size) glVertex2f(x_tick, y_off + h / 2.0) glVertex2f(x_tick, y_off + h / 2.0 - tick_size) glVertex2f(x_tick + w / 2.0, y_off + h / 2.0) glVertex2f(x_tick + w / 2.0, y_off + h / 2.0 - tick_size) glEnd() x_tick = x_off + 3 val_max = int((h / 2.0 - 2.0 * y_off) / val_space) # Work for value in range(val_max): y_tick = value * val_space + y_off glBegin(GL_LINES) glVertex2f(x_off,y_tick) glVertex2f(x_off - tick_size, y_tick) glEnd() # Pixels value = 0 val_max = max[9] / mag_scale(max[9]) y_scale = (h - 4.0 * y_off) / (2.0 * val_max) val_inc = tick_inc(val_max,5) while (value < val_max): y_tick = (value * y_scale) + y_off glBegin(GL_LINES) glVertex2f(x_off, y_tick + h / 2.0) glVertex2f(x_off - tick_size, y_tick + h / 2.0) glEnd() if (value): PyBench.draw_string(font_style, `value`, x_tick, y_tick + h / 2.0) value = value + val_inc # Vertices value = 0 val_max = max[12] / mag_scale(max[12]) y_scale = (h - 4.0 * y_off) / (2.0 * val_max) val_inc = tick_inc(val_max,5) while (value < val_max): y_tick = (value * y_scale) + y_off glBegin(GL_LINES) glVertex2f(x_off + w / 2.0, y_tick + h / 2.0) glVertex2f(x_off + w / 2.0 - tick_size, y_tick + h / 2.0) glEnd() if (value): PyBench.draw_string(font_style, `value`, x_tick+w/2.0, y_tick+h/2.0) value = value + val_inc # Draw axes glBegin(GL_LINE_STRIP) glVertex2f(x_off, h / 2.0 - y_off) glVertex2f(x_off, y_off) glVertex2f(w / 2.0 - x_off, y_off) glEnd() glBegin(GL_LINE_STRIP) glVertex2f(x_off, h - y_off) glVertex2f(x_off, h / 2.0 + y_off) glVertex2f(w / 2.0 - x_off, h / 2.0 + y_off) glEnd() glBegin(GL_LINE_STRIP) glVertex2f(w / 2.0 + x_off, h - y_off) glVertex2f(w / 2.0 + x_off, h / 2.0 + y_off) glVertex2f(w - x_off, h / 2.0 + y_off) glEnd() # Draw color key for num in range(len(tests)): test = tests[num] (name_hash,color,stipple) = (test[0], test[-2], test[-1]) [name] = name_hash.keys() glEnable(GL_LINE_STIPPLE) glLineStipple(3, stipple) glBegin(GL_LINES) glColor3f(color[0],color[1],color[2]) glVertex2f(x_off + w / 2.0, y_off + num * key_scale) glVertex2f(x_off + w / 2.0 + key_size, y_off + num * key_scale) glEnd() glDisable(GL_LINE_STIPPLE) PyBench.draw_string(font_style, name, x_off+w/2.0+key_size*2.0, y_off+num*key_scale) # Draw performance graph lines # Pixels per second draw_one_stat(x_off, y_off + h / 2.0, y_off, x_scale, 9) glColor3f(1.0, 1.0, 1.0) PyBench.draw_string(font_style, mag_char(max[9])+" Pixels/Sec", w/4.0, h-2.0*y_off) # Vertices per second draw_one_stat(x_off + w / 2.0, y_off + h / 2.0, y_off, x_scale, 12) glColor3f(1.0, 1.0, 1.0) PyBench.draw_string(font_style, mag_char(max[12])+" Vertices/Sec", 3.0*w/4.0, h-2.0*y_off) # "Work" per second, the geometric mean of pixels and vertices draw_one_stat(x_off, y_off, y_off, x_scale, 13) glColor3f(1.0, 1.0, 1.0) PyBench.draw_string(font_style, "Work/Sec", w / 4.0, h / 2.0 - 2.0 * y_off) # Show our graph end_frame() showing_graph = 1 return def cleanup(): fixup_stats() show_stats() draw_stats() return def display(): global done global ready if (not done): benchmark() elif (not ready): cleanup() return def keyboard(key,x,y): global done if ord(key) == 27 or key.upper() == 'Q': glutDestroyWindow(app) sys.exit(0) if (done and key.upper() == 'R'): draw_stats() return def start_frame(): glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT) return def end_frame(): glFinish() return def benchmark(): global MIN_FRAMES global MIN_SECONDS global w global h global stats global tests global test global done global run global slow global fast if (test >= len(tests)): if (not done): print "." done += 1 return (draw_hash,do_stats,group,color,stipple) = tests[test] counts = ((group == 'single') or (group == 'slow')) and slow or slow+fast [name] = draw_hash.keys() draw = draw_hash[name] if (not run): sys.stdout.write(" " + name) # After printing current test name, busy wait for a second # so that the terminal can catch up and not do work while # the GL timing is in progress t = time.time() while(1 > time.time() - t): continue count = counts[run] size = w / count PyBench.fade_to_white((test + (run / len(counts))) / len(tests)) run_done = 0 frames = 0 start = time.time() while (not run_done): start_frame() draw(count,size) end_frame() frames += 1 if (MIN_FRAMES <= frames and MIN_SECONDS <= (time.time() - start)): run_done = 1 glFinish() end = time.time() secs = end - start stats.append([name,count,secs,frames] + do_stats(count,size)) run += 1 if (run >= len(counts)): test += 1 run = 0 return def init_display_lists(): global w global h global dl_types global dls sys.stdout.write("Init display lists:") list = slow list.extend(fast) num_lists = len(dl_types) * len(list) current = glGenLists(num_lists) types = dl_types.keys() types.sort() for type in types: sys.stdout.write(' '+type) for count in list: dls[type+'_'+`count`] = current glNewList(current, GL_COMPILE) current = current + 1 dl_types[type](count, w / count) glEndList() print "." return def init_vertex_arrays(): global w global h global va_types global vas sys.stdout.write("Init vertex arrays:") types = va_types.keys() types.sort() for type in types: sys.stdout.write(' '+type) list = slow list.extend(fast) for count in list: data = va_types[type](count, w / count) #va = pack 'f*', @$data; vas[type+'_'+`count`] = data print "." return def show_user_message(): print """TRISLAM benchmarks several methods of pushing OpenGL primitives, testing each method with various primitive counts and sizes. During the benchmark, the test window will start out black, slowly brightening to white as testing progresses. Once benchmarking is complete, the collected data will be dumped in tabular form. The configuration for this series of tests will be as follows: """ PyBench.show_basic_config(conf,gl_info,VERSION) global slow global fast print "standard runs: ", for i in slow: sys.stdout.write(' '+`i`) print "" print "extra fast runs: ", for i in fast: sys.stdout.write(' '+`i`) print "" for i in range(79): sys.stdout.write("-") print "" return #def recurse_combos(base,max_power,rest): def recurse_combos(params): if (not len(params)): return [1] base = params[0] max_power = params[1] combos = [] for power in range(max_power+1): multiplier = base ** power for item in recurse_combos(params[2:]): combos.append(item * multiplier) return combos def init(): global MIN_FRAMES global MIN_SECONDS global w global h global conf global app global gl_info global slow global fast # Figure out primitive counts for each run of each test type combo_hash = {} for item in recurse_combos(max_powers): combo_hash[`item`] = [] combos = combo_hash.keys() combos.sort(lambda a,b: cmp(int(a), int(b))) for item in combos: i = int(item) if (i <= max_count_slow): slow.append(i) if (i > max_count_slow and i <= max_count_fast): fast.append(i) # Choose drawing area size to match counts h = w = int(combos[-1]) # Do the standard init stuff, including command line processing, # window creation, and so on default_conf = { 'title': 'Triangle Slammer OpenGL Benchmark', 'geometry': `w`+'x'+`h`, 'frames': 10, 'seconds': 1, } [conf, app, gl_info] = PyBench.basic_init(default_conf) # Reduce indirections in inner loops MIN_FRAMES = conf['frames'] MIN_SECONDS = conf['seconds'] # Let user know what's going on show_user_message() # Change projection to integer-pixel ortho glMatrixMode(GL_PROJECTION) glOrtho(0, w, 0, h, -1, 1) glMatrixMode(GL_MODELVIEW) # Load font for graph labels #$font_style = POGLBench::init_bitmap_font($font_file); # Make sure GL state is consistent for VA and DL creation start_frame() # Create vertex arrays and display lists outside timing loop init_vertex_arrays() init_display_lists() # Clean up GL state end_frame() return # Main app init() print "Benchmarks:", glutDisplayFunc(display) glutIdleFunc(display) glutKeyboardFunc(keyboard) glutMainLoop()