/// LSU EE 4702-1 (Fall 2012), GPU Programming // /// Homework 5 -- SOLUTION // // This version includes some changes no in original assignment. // To get the original assignment issue the following commands // in a check-out of the assignment: // svn update -r397 // // Assignment in: http://www.ece.lsu.edu/koppel/gpup/2012/hw05.pdf // /// Your Name: /// Note: Requires OpenGL 4.3 and CUDA /// Keyboard Commands // /// Object (Eye, Light, Ball) Location or Push // Arrows, Page Up, Page Down // Will move object or push end of spring, depending on mode: // 'e': Move eye. // 'b': Grab top (last segment of) of spring. // 'l': Move light. // // 'B': Release top (last segment of) spring. // /// Eye Direction // Home, End, Delete, Insert // Turn the eye direction. // Home should rotate eye direction up, End should rotate eye // down, Delete should rotate eye left, Insert should rotate eye // right. The eye direction vector is displayed in the upper left. /// Simulation Options // (Also see variables below.) // // 'a' Switch between CPU and GPU physics. // 'i' Cycle three methods of interpenetration handling: // GPU Physics Mode: None, GPU-Prob-1, GPU-Prob-2. // CPU Physics Mode: None, CPU, CPU (last two equivalent). // 'b' Grab free end (last segment) of spring. Once grabbed, can // be controlled with arrow keys (see above). // 'B' Release spring. // 'p' Pause simulation. // 'g' Toggle gravity on and off. // 't' Toggle state of Boolean opt_test, use it as you like. // 'S' Switch between different shaders in reverse direction. // 'F11' Change size of text. // 'F12' Write screenshot to file. /// Variables // Selected program variables can be modified using the keyboard. // Use "Tab" to cycle through the variable to be modified, the // name of the variable is displayed next to "VAR" on the bottom // line of green text. // 'Tab' Cycle to next variable. // '`' Cycle to previous variable. // '+' Increase variable value. // '-' Decrease variable value. // // -- Documented Variables -- // // Segs Per Helix Rev // - The number of segments in one revolution of helix. // A smaller number means fewer primitives. // // Segs Per Wire Rev // - The number of segments in 1 revolution around wire. // A smaller number means fewer primitives. // // Light Intensity // - The light intensity. // #define GL_GLEXT_PROTOTYPES #define GLX_GLXEXT_PROTOTYPES #define GL_GLEXT_LEGACY #include <GL/gl.h> #include <GL/glx.h> #include <GL/glext.h> #include <GL/glxext.h> // NVIDIA has not yet updated their include files. :-( #ifndef GL_ARB_shader_storage_buffer_object #define GL_SHADER_STORAGE_BUFFER 0x90D2 #endif #include <GL/glu.h> #include <GL/freeglut.h> // Include files provided for this course. // #include <gp/util.h> #include <gp/glextfuncs.h> #include <gp/coord.h> #include <gp/shader.h> #include <gp/pstring.h> #include <gp/misc.h> #include <gp/gl-buffer.h> #include <gp/texture-util.h> #include "shapes.h" #include <gp/cuda-util.h> #include "hw5-sol.cuh" // Define storage buffer binding indices and attribute locations. // #define UNIF_IDX_BULDGE_LOC 0 #define UNIF_IDX_BULDGE_DIST_THRESH 1 #define UNIF_IDX_WIRE_RADIUS 2 #define ATTR_IDX_HELIX_INDICES 1 #define SB_COORD 1 #define SB_U 2 #define SB_V 3 enum Shader_Program { SP_Fixed, SP_Geo_Shade1, SP_Geo_Shade2, SP_ENUM_SIZE }; const char* const shader_program[] = { "SP_Fixed", "SP_Geo_Shade1", "SP_Geo_Shade2", "SP_ENUM_SIZE" }; enum GPU_Physics_Method { GP_cpu, GP_cuda, GP_ENUM_SIZE }; const char* const gpu_physics_method_str[] = { "CPU", "CUDA" }; struct Wire_Segment { pCoor position; pQuat orientation; pVect velocity; pVect omega; pVect ctr_to_right_dir; pVect u, v; pVect nu, nv, nw; pCoor pos_left, pos_right; pVect force; pVect torque; }; class World { public: World(pOpenGL_Helper &fb):ogl_helper(fb){init();} void init(); static void render_w(void *moi){ ((World*)moi)->render(); } void render(); void cb_keyboard(); void modelview_update(); // Class providing utilities, such as showing text. // pOpenGL_Helper& ogl_helper; // Class for easy keyboard control of variables. // pVariable_Control variable_control; // Class for showing frame timing. // pFrame_Timer frame_timer; pCoor light_location; float opt_light_intensity; pCoor helix_location; Wire_Segment *helix_end_ws; // Last (end) segment of helix. bool opt_end_fixed; // If true position of "end" of helix is fixed in space. float helix_radius; // Radius of helix. float wire_radius; // Radius of wire forming helix. int seg_per_helix_revolution; int seg_per_wire_revolution; bool coords_stale; bool buffer_objects_stale; PStack<int> helix_indices; GLuint helix_indices_bo; // Coordinates of helix. (Helix runs through center of wire.) // PStack<pCoor> helix_coords; GLuint helix_coords_bo; int helix_coords_size; pCoor *helix_u, *helix_v; GLuint helix_u_bo; GLuint helix_v_bo; PStack<int> wire_surface_indices; GLuint wire_surface_indices_bo; int wire_surface_indices_size; int helix_indices_size; int opt_shader; pShader *sp_fixed; // Fixed functionality. pShader *sp_geo_shade; enum { MI_Eye, MI_Light, MI_Ball, MI_Ball_V, MI_COUNT } opt_move_item; pCoor eye_location; pVect eye_direction; pMatrix modelview; GLuint texture_id_syllabus; // Physics double world_time; float delta_t; bool opt_pause; PStack<Wire_Segment> wire_segments; pVect gravity_accel; pQuat helix_rn_trans; float helix_seg_hlength; float helix_seg_hlength_inv; float opt_spring_constant; float opt_helix_density; bool opt_gravity; int opt_interpen_method; bool opt_test; float helix_seg_mass; float helix_seg_mass_inv; float helix_seg_ma_axis; float helix_seg_ma_perp_axis; void helix_apply_force_at (Wire_Segment *ws, pCoor pos, pVect dir, float magnitude); pVect helix_get_vel_at(Wire_Segment *ws, pCoor pos); float helix_spring_energy; void time_step_cpu(); // CUDA Physics int opt_physics_method; bool cuda_initialized; bool cuda_constants_stale; cudaEvent_t frame_start_ce, frame_stop_ce; int block_size_max; cudaDeviceProp cuda_prop; // Properties of cuda device (GPU, cuda version). cudaFuncAttributes cfa_helix; // Properties of code to run on device. pCUDA_Memory<pVect> helix_position; pCUDA_Memory<pVect> helix_velocity; pCUDA_Memory<pQuat> helix_orientation; pCUDA_Memory<pVect> helix_omega; void cuda_init(); }; void World::init() { coords_stale = true; seg_per_helix_revolution = 80; seg_per_wire_revolution = 20; // variable_control.insert(seg_per_helix_revolution,"Seg Per Helix Rev"); // variable_control.insert(seg_per_wire_revolution,"Seg Per Wire Rev"); buffer_objects_stale = true; helix_coords_bo = 0; wire_surface_indices_bo = 0; eye_location = pCoor(-1.8,6.9,23.4); eye_direction = pVect(0,0,-1); opt_light_intensity = 1.5; light_location = pCoor(12.2,4.0,6.9); opt_end_fixed = false; helix_location = pCoor(0,0,-5); helix_radius = 5; wire_radius = 0.6; variable_control.insert(wire_radius,"Wire Radius"); // variable_control.insert(opt_light_intensity,"Light Intensity"); opt_move_item = MI_Eye; texture_id_syllabus = pBuild_Texture_File("hw05.png",false,255); // Declared like a programmable shader, but used for fixed-functionality. // sp_fixed = new pShader(); const char* const file = "hw5-shdr.cc"; sp_geo_shade = new pShader (file, // File holding shader program. "vs_main_helix();", // Name of vertex shader main routine. "gs_main_helix();", "fs_main_phong();" // Name of fragment shader main routine. ); opt_shader = SP_Geo_Shade1; modelview_update(); opt_physics_method = GP_cuda; opt_spring_constant = 900000; gravity_accel = pVect(0,-.98,0); opt_helix_density = 1; helix_u = helix_v = NULL; variable_control.insert(opt_spring_constant,"Spring Constant"); delta_t = 1.0 / ( 30 * 200 ); opt_pause = false; world_time = time_wall_fp(); opt_gravity = true; opt_interpen_method = 0; opt_test = false; cuda_init(); } void World::modelview_update() { pMatrix_Translate center_eye(-eye_location); pMatrix_Rotation rotate_eye(eye_direction,pVect(0,0,-1)); modelview = rotate_eye * center_eye; } void World::render() { // This routine called whenever window needs to be updated. // Get any waiting keyboard commands. // cb_keyboard(); // Start a timer object used for tuning this code. // frame_timer.frame_start(); glClearColor(0,0,0,0); glClearDepth(1.0); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT ); glEnable(GL_DEPTH_TEST); glDepthFunc(GL_LESS); glShadeModel(GL_SMOOTH); ogl_helper.fbprintf("%s\n",frame_timer.frame_rate_text_get()); ogl_helper.fbprintf ("Eye location: [%5.1f, %5.1f, %5.1f] " "Eye direction: [%+.2f, %+.2f, %+.2f]\n", eye_location.x, eye_location.y, eye_location.z, eye_direction.x, eye_direction.y, eye_direction.z); pVariable_Control_Elt* const cvar = variable_control.current; ogl_helper.fbprintf("VAR %s = %.5f (TAB or '`' to change, +/- to adjust)\n", cvar->name,cvar->get_val()); ogl_helper.fbprintf ("Light location: [%5.1f, %5.1f, %5.1f] " "Helix Location[%5.1f, %5.1f, %5.1f]\n", light_location.x, light_location.y, light_location.z, helix_location.x, helix_location.y, helix_location.z ); if ( !sp_geo_shade->pobject ) ogl_helper.fbprintf ("Programmable GPU API: %savailable. GPU Code: %s\n", ptr_glCreateShader ? "" : "not", sp_geo_shade->pobject ? "okay" : "problem"); const double time_now = time_wall_fp(); const bool blink_visible = int64_t(time_now*3) & 1; # define BLINK(txt,pad) ( blink_visible ? txt : pad ) ogl_helper.fbprintf ("Physics: %s ('a') Pause: %s ('p') Gravity: %s ('g') Grabbed: %s ('%s') Interpenetration: %s ('i') Debug: %d ('t')\n", opt_physics_method != GP_cuda ? BLINK(gpu_physics_method_str[opt_physics_method]," ") : gpu_physics_method_str[opt_physics_method], opt_pause ? BLINK("ON"," ") : "OFF", opt_gravity ? "ON" : "OFF", opt_end_fixed ? "YES" : "NO", opt_end_fixed ? "B" : "b", opt_interpen_method ? ( opt_interpen_method == 1 ? "PROB-1" : "PROB-2" ) : BLINK("OFF"," "), opt_test); ogl_helper.fbprintf("Spring Energy: %f\n", helix_spring_energy); const int win_width = ogl_helper.get_width(); const int win_height = ogl_helper.get_height(); const float aspect = float(win_width) / win_height; glMatrixMode(GL_MODELVIEW); glLoadIdentity(); glLoadTransposeMatrixf(modelview); glMatrixMode(GL_PROJECTION); glLoadIdentity(); // Frustum: left, right, bottom, top, near, far glFrustum(-.8,.8,-.8/aspect,.8/aspect,1,5000); glEnable(GL_LIGHTING); const pColor white(1,1,1); const pColor red(1,0,0); const pColor blue(0,0,1); const pColor lsu_spirit_purple(0x580da6); const pColor lsu_spirit_gold(0xf9b237); glEnable(GL_LIGHT0); glLightfv(GL_LIGHT0, GL_POSITION, light_location); glLightfv(GL_LIGHT0, GL_DIFFUSE, white * opt_light_intensity); // Set lighting parameters for when shader is not used, as for the // big triangle. // glEnable(GL_COLOR_MATERIAL); glColorMaterial(GL_FRONT_AND_BACK,GL_AMBIENT_AND_DIFFUSE); glLightModeli(GL_LIGHT_MODEL_TWO_SIDE,1); pError_Check(); // Set parameters that apply to a texture (texture_id_syllabus). // glTexParameteri (GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR ); glTexParameteri (GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR ); // Set parameter for texture unit. // glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE); sp_fixed->use(); /// /// Paint Single Triangle. /// pColor color_tri(0x7815b6); // Red, Green, Blue glColor3fv( color_tri ); glEnable(GL_TEXTURE_2D); glBindTexture(GL_TEXTURE_2D,texture_id_syllabus); // Indicate type of primitive. // glBegin(GL_TRIANGLES); // Specify vertices for a triangle. // pCoor p1( 9.5, -5, -1.2 ); pCoor p2( 0, 5, -3 ); pCoor p3( 9, 6, -7 ); pNorm triangle_normal = cross(p3,p2,p1); // Specify normal and vertex using course-defined objects pCoor and // pNorm. OpenGL sees these as pointers to floats. glNormal3fv(triangle_normal); glTexCoord2f(0.95,1.0); glVertex3fv(p1); glTexCoord2f(0.00,0.1); glVertex3fv(p2); glTexCoord2f(0.90,0.0); glVertex3fv(p3); glEnd(); glDisable(GL_TEXTURE_2D); /// /// Construct a Helix /// if ( coords_stale ) { // Recompute helix coordinates, etc. coords_stale = false; cuda_constants_stale = true; buffer_objects_stale = true; // Reset existing storage. helix_coords.reset(); helix_indices.reset(); wire_surface_indices.reset(); wire_segments.reset(); // Number of times helix wraps around. const int revolutions_per_helix = 8; const int segments_per_helix = revolutions_per_helix * seg_per_helix_revolution; const double delta_eta = 2 * M_PI / seg_per_helix_revolution; const double delta_y = 4 * wire_radius / seg_per_helix_revolution; int wire_surface_idx = 0; pVect vZero(0,0,0); const float helix_seg_hlength_xy = helix_radius * tan(0.5 * delta_eta); helix_seg_hlength = sqrtf( helix_seg_hlength_xy * helix_seg_hlength_xy + 0.25 * delta_y * delta_y ); const double angle_seg = asinf(delta_y /( 2 * helix_seg_hlength )); helix_seg_hlength_inv = 1 / helix_seg_hlength; pQuat tilt_up_seg(pVect(0,0,1),angle_seg); pQuat tilt_up_inv(pVect(0,0,1),-angle_seg); pQuat rot(pVect(0,-1,0),delta_eta); helix_rn_trans = tilt_up_inv * rot * tilt_up_seg; const float wire_rad_sq = wire_radius * wire_radius; helix_seg_mass = opt_helix_density * M_PI * wire_rad_sq * 2 * helix_seg_hlength; helix_seg_mass_inv = 1 / helix_seg_mass; helix_seg_ma_axis = helix_seg_mass * wire_rad_sq / 2; helix_seg_ma_perp_axis = helix_seg_mass * ( 4 * helix_seg_hlength * helix_seg_hlength + 3 * wire_rad_sq ) / 12; for ( int i = 0; i < segments_per_helix; i++ ) { const bool last_i_iteration = i + 1 == segments_per_helix; const double eta = i * delta_eta; pCoor p0( helix_radius * cos(eta), i * delta_y, helix_radius * sin(eta)); // Initialize a helix (wire) segment, used for physical simulation. // Wire_Segment* const ws = wire_segments.pushi(); ws->position = p0; ws->velocity = vZero; ws->omega = vZero; // Rotation rate. pQuat rot(pVect(0,-1,0),eta + M_PI / 2); // Rotate cylinder coordinates to world coordinates. ws->orientation = rot * tilt_up_seg; helix_coords += p0; for ( int j = 0; j < seg_per_wire_revolution; j++ ) { const int idx = wire_surface_idx++; helix_indices += i; helix_indices += j; if ( last_i_iteration ) continue; // Insert indices for triangle with one vertex on eta. wire_surface_indices += idx; // This vertex. wire_surface_indices += idx + seg_per_wire_revolution; } } wire_surface_indices_size = wire_surface_indices.occ(); helix_coords_size = helix_coords.occ(); helix_indices_size = helix_indices.occ(); helix_end_ws = &wire_segments.peek(); } // Update data on GPU if necessary. // if ( cuda_constants_stale ) { cuda_constants_stale = false; const int phys_helix_segments = wire_segments.occ(); // Allocate storage and write GPU variables with addresses of storage. // helix_position.realloc(phys_helix_segments); helix_position.ptrs_to_cuda("helix_position"); helix_velocity.realloc(phys_helix_segments); helix_velocity.ptrs_to_cuda("helix_velocity"); helix_omega.realloc(phys_helix_segments); helix_omega.ptrs_to_cuda("helix_omega"); helix_orientation.realloc(phys_helix_segments); helix_orientation.ptrs_to_cuda("helix_orientation"); // Copy data from our CPU-friendly wire_segments array of // structures to GPU-friendly individual arrays. Note that this // loop copies data from one part of CPU memory to // another. Since the ultimate destination this extra CPU->CPU // copying is wasteful, but that's acceptable because it only // needs to be done rarely, at start up and when the user // changes a variable. // for ( int i=0; i<phys_helix_segments; i++ ) { Wire_Segment* const ws = &wire_segments[i]; helix_position[i] = pVect(ws->position); helix_velocity[i] = ws->velocity; helix_orientation[i] = ws->orientation; helix_omega[i] = ws->omega; } // Copy data from CPU to GPU. // helix_position.to_cuda(); helix_velocity.to_cuda(); helix_orientation.to_cuda(); helix_omega.to_cuda(); // Write scalar variables to a structure, and then // send structure to GPU. // Helix_Info hi; const float delta_t_mass_inv = delta_t * helix_seg_mass_inv; const float delta_t_ma_axis = delta_t / helix_seg_ma_axis; const float delta_t_ma_perp_axis = delta_t / helix_seg_ma_perp_axis; #define SET(m) hi.m = m; SET(opt_gravity); SET(opt_test); SET(opt_end_fixed); SET(opt_interpen_method); SET(opt_spring_constant); SET(delta_t); SET(delta_t_mass_inv); SET(delta_t_ma_axis); SET(delta_t_ma_perp_axis); SET(phys_helix_segments); SET(wire_radius); SET(helix_seg_hlength); SET(helix_seg_mass_inv); SET(gravity_accel); SET(helix_rn_trans); #undef SET TO_DEV(hi); } // If necessary, update data in buffer objects. if ( buffer_objects_stale ) { buffer_objects_stale = false; // Generate buffer id (name), if necessary. // glGenBuffers(1,&helix_indices_bo); glGenBuffers(1,&helix_coords_bo); glGenBuffers(1,&helix_u_bo); glGenBuffers(1,&helix_v_bo); glGenBuffers(1,&wire_surface_indices_bo); if ( helix_u ) { free(helix_u); free(helix_v); } helix_u = (pCoor*) malloc( helix_coords_size * sizeof(helix_u[0]) ); helix_v = (pCoor*) malloc( helix_coords_size * sizeof(helix_v[0]) ); glBindBuffer(GL_ARRAY_BUFFER, helix_coords_bo); glBufferData (GL_ARRAY_BUFFER, helix_coords_size*4*sizeof(helix_coords[0]), helix_coords.get_storage(), GL_STATIC_DRAW); glBindBuffer(GL_ARRAY_BUFFER, helix_indices_bo); glBufferData (GL_ARRAY_BUFFER, 2 * helix_indices_size * sizeof(helix_indices[0]), helix_indices.get_storage(), GL_STATIC_DRAW); glBindBuffer(GL_ARRAY_BUFFER, wire_surface_indices_bo); glBufferData (GL_ARRAY_BUFFER, wire_surface_indices_size*sizeof(wire_surface_indices[0]), wire_surface_indices.get_storage(),GL_STATIC_DRAW); // Tell GL that subsequent array pointers refer to host storage. // glBindBuffer(GL_ARRAY_BUFFER, 0); pError_Check(); } // Run the physical simulation if not paused. // if ( !opt_pause ) { const int phys_helix_segments = wire_segments.occ(); const double time_now = time_wall_fp(); // Use a small block size under the assumption that // phys_helix_segments is small. const int block_size = 32; const int grid_size = ( phys_helix_segments + block_size - 1 ) / block_size; if ( opt_physics_method == GP_cuda ) CE(cudaEventRecord(frame_start_ce,0)); // Evolve simulated state in time steps of duration delta_t until // simulated time reaches current time. // for ( int iter_count = 0; world_time < time_now && iter_count < 1000; iter_count++ ) { if ( opt_physics_method == GP_cuda ) { time_step_launch(grid_size,block_size); /// SOLUTION if ( opt_interpen_method == 2 ) time_step_intersect_launch(phys_helix_segments,256); time_step_update_pos_launch(grid_size,block_size); } else time_step_cpu(); world_time += delta_t; } if ( opt_physics_method == GP_cuda ) { // Collect amount of time CUDa took to compute this frame. // CE(cudaEventRecord(frame_stop_ce,0)); CE(cudaEventSynchronize(frame_stop_ce)); float cuda_time = -1.1; CE(cudaEventElapsedTime(&cuda_time,frame_start_ce,frame_stop_ce)); frame_timer.cuda_frame_time_set(cuda_time); // Copy data back to CPU. Some or all of this copying can be // eliminated. As things are currently written, there is no // need to copy back the velocity and orientation unless // simulation is switching to CPU physics. If the same GPU // were used for physics and graphics, there would also be // no need to copy back the position and orientation. (A // CUDA global memory can be mapped to an OpenGL buffer // object.) // helix_position.from_cuda(); helix_velocity.from_cuda(); helix_orientation.from_cuda(); helix_omega.from_cuda(); for ( int i=0; i<phys_helix_segments; i++ ) { Wire_Segment* const ws = &wire_segments[i]; ws->position = helix_position[i]; ws->velocity = helix_velocity[i]; ws->orientation = helix_orientation[i]; ws->omega = helix_omega[i]; } } // Compute vectors used for drawing wire, and copy helix coordinate. // for ( int i=0; i<phys_helix_segments; i++ ) { Wire_Segment* const ws = &wire_segments[i]; // Copy helix coordinates to another array before sending // back to GPU. This step could easily be eliminated since // the coordinates are already linearized in helix_position. // helix_coords[i] = ws->position; // Compute vectors used for drawing wire. // pMatrix_Rotation c_rot(ws->orientation); helix_u[i] = c_rot * pVect(0,0,1); helix_v[i] = c_rot * pVect(0,1,0); } // Send data back to GPU. glBindBuffer(GL_ARRAY_BUFFER, helix_coords_bo); glBufferData (GL_ARRAY_BUFFER, helix_coords_size*4*sizeof(helix_coords[0]), helix_coords.get_storage(), GL_STATIC_DRAW); glBindBuffer(GL_ARRAY_BUFFER, helix_u_bo); glBufferData (GL_ARRAY_BUFFER, helix_coords_size*sizeof(helix_u[0]), helix_u, GL_STATIC_DRAW); glBindBuffer(GL_ARRAY_BUFFER, helix_v_bo); glBufferData (GL_ARRAY_BUFFER, helix_coords_size*sizeof(helix_v[0]), helix_v, GL_STATIC_DRAW); } switch ( opt_shader ){ case SP_Fixed: break; case SP_Geo_Shade1: sp_geo_shade->use(); break; default: ASSERTS( false ); } /// /// Paint a Helix /// glBindBufferBase(GL_SHADER_STORAGE_BUFFER,SB_COORD,helix_coords_bo); glBindBufferBase(GL_SHADER_STORAGE_BUFFER,SB_U,helix_u_bo); glBindBufferBase(GL_SHADER_STORAGE_BUFFER,SB_V,helix_v_bo); glUniform1f(UNIF_IDX_WIRE_RADIUS,wire_radius); GE(); glMatrixMode(GL_MODELVIEW); glPushMatrix(); glTranslatef(helix_location.x,helix_location.y,helix_location.z); glRotatef(60,0,1,0); // Specify color. Since it's not an array the same color // will be used for all vertices, which is what we want. // If we wanted to vary vertex colors we could have created // and used a color array. // glColor3fv(lsu_spirit_gold); glBindBuffer(GL_ARRAY_BUFFER, helix_indices_bo); glEnableVertexAttribArray(ATTR_IDX_HELIX_INDICES); glVertexAttribIPointer (ATTR_IDX_HELIX_INDICES, 2, // Two components (as in a 2-element vector). GL_INT, 0, // Tightly packed. 0); glBindBuffer(GL_ARRAY_BUFFER, 0); // Avoid surprises. glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, wire_surface_indices_bo); glDrawElements (GL_TRIANGLE_STRIP, wire_surface_indices_size - 4 * seg_per_wire_revolution, GL_UNSIGNED_INT, (const GLvoid*) ( 2 * seg_per_wire_revolution * sizeof(GL_UNSIGNED_INT))); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); glDisableClientState(GL_NORMAL_ARRAY); glDisableClientState(GL_VERTEX_ARRAY); glDisableVertexAttribArray(ATTR_IDX_HELIX_INDICES); glPopMatrix(); sp_fixed->use(); // Render Marker for Light Source // insert_tetrahedron(light_location,0.5); pError_Check(); glColor3f(0.5,1,0.5); glDisable(GL_LIGHTING); glDisable(GL_DEPTH_TEST); frame_timer.frame_end(); glutSwapBuffers(); } void World::time_step_cpu() { pVect vZero(0,0,0); pVect gravity_force = helix_seg_mass_inv * gravity_accel; helix_spring_energy = 0; const int phys_helix_segments = wire_segments.occ(); /// Initialize wire segment variables for time step. // for ( int i=0; i<phys_helix_segments; i++ ) { Wire_Segment* const cseg = &wire_segments[i]; pMatrix_Rotation c_rot(cseg->orientation); // Compute world-space vectors of segments local coordinate space. cseg->u = c_rot * pVect(0,0,1); // Segment's +x direction. cseg->v = c_rot * pVect(0,1,0); // Etc. cseg->ctr_to_right_dir = c_rot * pVect(1,0,0); pQuat cn_rot_q = cseg->orientation * helix_rn_trans; pMatrix_Rotation cn_rot(cn_rot_q); cseg->nu = cn_rot * pVect(0,0,1); cseg->nv = cn_rot * pVect(0,1,0); pVect ctr_to_right = helix_seg_hlength * cseg->ctr_to_right_dir; // Compute location of ends of cylinder. cseg->pos_left = cseg->position - ctr_to_right; cseg->pos_right = cseg->position + ctr_to_right; cseg->force = opt_gravity ? gravity_force : vZero; cseg->torque = vZero; } /// Apply forces due to helix segment to the "right" (+x in local space). // for ( int i=0; i<phys_helix_segments-1; i++ ) { Wire_Segment* const cseg = &wire_segments[i]; // Us Wire_Segment* const rseg = &wire_segments[i+1]; // Our right neighbor. // Find distance between us and right neighbor at three points // on cylinder surface. We hope that the compiler unrolls this // loop to avoid computing sin and cos every time. const int pieces = 3; const float delta_theta = 2 * M_PI / pieces; for ( int j=0; j<pieces; j++ ) { const float theta = delta_theta * j; pCoor c_pt = cseg->pos_right + cseg->nu * cosf(theta) + cseg->nv * sinf(theta); pCoor r_pt = rseg->pos_left + rseg->u * cosf(theta) + rseg->v * sinf(theta); pNorm dist(c_pt,r_pt); const float force_mag = dist.magnitude * opt_spring_constant; pCoor mid_pt = 0.5 * ( c_pt + r_pt ); helix_apply_force_at(cseg,mid_pt,dist,force_mag); helix_apply_force_at(rseg,mid_pt,dist,-force_mag); } } const float delta_t_mass_inv = delta_t * helix_seg_mass_inv; const float delta_t_ma_axis = delta_t / helix_seg_ma_axis; const float delta_t_ma_perp_axis = delta_t / helix_seg_ma_perp_axis; /// Quick and Dirty and Slow Interpenetration Routine // if ( opt_interpen_method ) { // Rule out interpenetration between segments that are closer // together than MIN_IDX_DIST. In other words, assume that // segment 14 and segment 15 cannot interpenetrate each other // (but of course they are in contact in a normal way). // const int min_idx_dist = 0.999 + wire_radius / helix_seg_hlength; const float four_wire_radius_sq = 4 * wire_radius * wire_radius; // Use a brute force, O(n^2), search of all possible pairs of // segments. // for ( int i=1; i<phys_helix_segments; i++ ) for ( int j=i+min_idx_dist; j<phys_helix_segments; j++ ) { Wire_Segment* const aseg = &wire_segments[i]; Wire_Segment* const bseg = &wire_segments[j]; pNorm dist(aseg->position,bseg->position); // Use bounding sphere to quickly rule out contact. // if ( dist.mag_sq > four_wire_radius_sq ) continue; // As a crude approximation, compute depth of // interpenetration using bounding sphere. // const float pen_depth = 2 * wire_radius - dist.magnitude; pVect sep_force = pen_depth * opt_spring_constant * dist; aseg->force -= sep_force; bseg->force += sep_force; } } /// Using force and torque, update velocity, omega, position, and orientation. // for ( int i=1; i<phys_helix_segments; i++ ) { Wire_Segment* const cseg = &wire_segments[i]; cseg->velocity *= 0.9999; cseg->omega *= 0.9999; cseg->velocity += delta_t_mass_inv * cseg->force; if ( opt_end_fixed && i + 1 == phys_helix_segments ) cseg->velocity = vZero; cseg->position += delta_t * cseg->velocity; const float torque_axial_mag = dot( cseg->torque, cseg->ctr_to_right_dir ); pVect torque_axial = torque_axial_mag * cseg->ctr_to_right_dir; pVect do_axial = delta_t_ma_axis * torque_axial; pVect torque_other = cseg->torque - torque_axial; pVect do_other = delta_t_ma_perp_axis * torque_other; cseg->omega += do_axial + do_other; pNorm axis(cseg->omega); cseg->orientation = pQuat(axis,delta_t * axis.magnitude) * cseg->orientation; } } void World::helix_apply_force_at (Wire_Segment *s, pCoor pos, pVect dir, float magnitude) { s->force += magnitude * dir; helix_spring_energy += fabs(magnitude); pVect arm(s->position,pos); pVect axis = cross( arm, dir ); pVect amt = magnitude * axis; helix_spring_energy += amt.mag(); s->torque += amt; } pVect World::helix_get_vel_at(Wire_Segment *s, pCoor pos) { pVect arm(s->position,pos); pVect vel_rot = cross( s->omega, arm ); return vel_rot + s->velocity; } void World::cuda_init() { cuda_constants_stale = true; // Get information about GPU and its ability to run CUDA. // int device_count; cudaGetDeviceCount(&device_count); // Get number of GPUs. ASSERTS( device_count ); /// Print information about the available GPUs. // for ( int dev = 0; dev < device_count; dev ++ ) { CE(cudaGetDeviceProperties(&cuda_prop,dev)); CE(cudaGLSetGLDevice(dev)); printf ("GPU %d: %s @ %.2f GHz WITH %d MiB GLOBAL MEM\n", dev, cuda_prop.name, cuda_prop.clockRate/1e6, int(cuda_prop.totalGlobalMem >> 20)); printf ("GPU %d: CAP: %d.%d MP: %2d TH/WP: %3d TH/BL: %4d\n", dev, cuda_prop.major, cuda_prop.minor, cuda_prop.multiProcessorCount, cuda_prop.warpSize, cuda_prop.maxThreadsPerBlock ); printf ("GPU %d: SHARED: %5d CONST: %5d # REGS: %5d\n", dev, int(cuda_prop.sharedMemPerBlock), int(cuda_prop.totalConstMem), cuda_prop.regsPerBlock ); } const int dev = 0; printf("Using GPU %d\n",dev); CE(cudaSetDevice(dev)); // Prepare events used for timing. // CE(cudaEventCreate(&frame_start_ce)); CE(cudaEventCreate(&frame_stop_ce)); cuda_setup(&cfa_helix); block_size_max = cfa_helix.maxThreadsPerBlock; set_min(block_size_max,cuda_prop.maxThreadsPerBlock); // Print information about time_step routine. // printf("CUDA Routine Resource Usage:\n"); printf(" pass_helix: %6zd shared, %zd const, %zd loc, %d regs; " "%d max thr\n", cfa_helix.sharedSizeBytes, cfa_helix.constSizeBytes, cfa_helix.localSizeBytes, cfa_helix.numRegs, cfa_helix.maxThreadsPerBlock); } void World::cb_keyboard() { if ( !ogl_helper.keyboard_key ) return; pVect adjustment(0,0,0); pVect user_rot_axis(0,0,0); const float move_amt = 0.4; const float wire_radius_prev = wire_radius; const float helix_radius_prev = helix_radius; switch ( ogl_helper.keyboard_key ) { case FB_KEY_LEFT: adjustment.x = -move_amt; break; case FB_KEY_RIGHT: adjustment.x = move_amt; break; case FB_KEY_PAGE_UP: adjustment.y = move_amt; break; case FB_KEY_PAGE_DOWN: adjustment.y = -move_amt; break; case FB_KEY_DOWN: adjustment.z = move_amt; break; case FB_KEY_UP: adjustment.z = -move_amt; break; case FB_KEY_DELETE: user_rot_axis.y = 1; break; case FB_KEY_INSERT: user_rot_axis.y = -1; break; case FB_KEY_HOME: user_rot_axis.x = 1; break; case FB_KEY_END: user_rot_axis.x = -1; break; case 'a': opt_physics_method++; if ( opt_physics_method > GP_cuda ) opt_physics_method = GP_cpu; if ( opt_physics_method == GP_cuda ) cuda_constants_stale = true; break; case 'b': opt_end_fixed = true; opt_move_item = MI_Ball; cuda_constants_stale = true; break; case 'B': opt_end_fixed = false; cuda_constants_stale = true; break; case 'e': case 'E': opt_move_item = MI_Eye; break; case 'l': case 'L': opt_move_item = MI_Light; break; case 'g': case 'G': opt_gravity = !opt_gravity; cuda_constants_stale = true; break; case 'i': case 'I': opt_interpen_method++; if ( opt_interpen_method > 2 ) opt_interpen_method = 0; cuda_constants_stale = true; break; case 't': case 'T': opt_test = !opt_test; break; case 'p': case 'P': opt_pause = !opt_pause; if ( !opt_pause ) world_time = time_wall_fp(); break; case 9: variable_control.switch_var_right(); break; case 96: variable_control.switch_var_left(); break; // `, until S-TAB works. case '-':case '_': variable_control.adjust_lower(); cuda_constants_stale = true; break; case '+':case '=': variable_control.adjust_higher(); cuda_constants_stale = true; break; default: printf("Unknown key, %d\n",ogl_helper.keyboard_key); break; } if ( wire_radius_prev != wire_radius || helix_radius_prev != helix_radius ) coords_stale = true; // Update eye_direction based on keyboard command. // if ( user_rot_axis.x || user_rot_axis.y ) { pMatrix_Rotation rotall(eye_direction,pVect(0,0,-1)); user_rot_axis *= invert(rotall); eye_direction *= pMatrix_Rotation(user_rot_axis, M_PI * 0.03); modelview_update(); } // Update eye_location based on keyboard command. // if ( adjustment.x || adjustment.y || adjustment.z ) { const double angle = fabs(eye_direction.y) > 0.99 ? 0 : atan2(eye_direction.x,-eye_direction.z); pMatrix_Rotation rotall(pVect(0,1,0),-angle); adjustment *= rotall; switch ( opt_move_item ){ case MI_Light: light_location += adjustment; break; case MI_Eye: eye_location += adjustment; break; case MI_Ball: cuda_constants_stale = true; if ( opt_end_fixed ) helix_end_ws->position += adjustment; else helix_location += adjustment; break; default: break; } modelview_update(); } } int main(int argv, char **argc) { pOpenGL_Helper popengl_helper(argv,argc); World world(popengl_helper); // Specify default frame update rate. // // Default rate used if API won't allow updating on each // display device frame. // popengl_helper.rate_set(30); // Start // popengl_helper.display_cb_set(world.render_w,&world); }