/// LSU EE 7700-1 (Sp 2009), Graphics Processors
//
/// Balloon Simulation
// $Id:$
/// Purpose
//
// Demonstrate use of gpu for physics.
/// What Code Does
// The code in this file simulates a /balloon/: an elastic /surface/
// that encloses a /gas/. Outside the balloon is /air/, and there
// is a /platform/ for the balloon to bounce on. Many physical
// parameters can be varied affecting the balloon bouncyness and
// buoyancy.
// By default the balloon simulation (physics) is performed on the GPU
// using CUDA using a one-pass algorithm, but the user can switch
// between a second CUDA algorithm, using the OpenGL API for physics,
// or having the CPU do the physics.
// The platform consists of tiles, some are purple-tinted mirrors
// (showing a reflection of the balloon), the others show the course
// syllabus. The balloon can cast a shadow on the tiles.
/// Keyboard Commands
//
/// Object (Eye, Light, Balloon) Location or Push
// Arrows, Page Up, Page Down
// Will move object or push balloon, depending on mode:
// 'e': Move eye.
// 'l': Move light.
// 'b': Move balloon. (Change position but not velocity.)
// 'B': Push balloon. (Add velocity.)
//
/// Eye Direction
// Home, End, Delete, Insert
// Turn the eye direction.
// Home should rotate eye direction up, End should rotate eye
// down, Delete should rotate eye left, Insert should rotate eye
// right. The eye direction vector is displayed in the upper left.
/// Simulation Options
// (Also see variables below.)
//
// 'p' Pause simulation. (Press again to resume.)
// 's' Stop balloon (but not vibration).
// 'S' Freeze balloon. (Set velocity of all vertices to zero.)
// 'a' Cycle between different physics algorithms and processors (GPU/CPU).
// 'A' Switch between CPU and GPU physics.
// 'g' Turn gravity on and off.
// 'n' Switch between textured and striped balloon surface.
// 'F12' Write screenshot to file balloon.png.
/// Variables
// Selected program variables can be modified using the keyboard.
// Use "Tab" to cycle through the variable to be modified, the
// name of the variable is displayed next to "VAR" on the bottom
// line of green text.
// 'Tab' Cycle to next variable.
// '`' Cycle to previous variable.
// '+' Increase variable value.
// '-' Decrease variable value.
//
// VAR Light Intensity - The light intensity.
// VAR Gas Amount - Amount of gas in balloon.
// VAR Gravity - Gravitational acceleration. (Turn on/off using 'g'.)
// VAR Air Resistance - Viscosity of air.
// VAR Gas Particle Mass - Initially matches air mass.
// VAR Spring Constant - Warning: watch Oversample as this is increased.
// VAR Repulsion Constant - Determines stiffness of surface.
// VAR Surface Mass - Warning: watch Oversample as this is reduced.
/// Bugs and Incomplete Features
// Code is only sparsely commented.
// Some code in this file should be moved to other files.
// The reflection and shadow are incorrect when the balloon
// is below the platform.
// At high spring constant values the simulation leaks momentum.
// The repulsion used to model stiffness should be augmented or replaced
// with forces based on angle (not just distance).
/// To Do
// Detect and prevent interpenetration.
// Use something like Verlet integration to update position.
// Provide presets of physical parameters, for example, to simulate a
// hot-air balloon, a partially deflated basketball, etc.
#define GL_GLEXT_PROTOTYPES
#define GLX_GLXEXT_PROTOTYPES
#include <GL/gl.h>
#include <GL/glext.h>
#include <GL/glx.h>
#include <GL/glxext.h>
#include <GL/glu.h>
#include <GL/freeglut.h>
#include <Magick++.h>
#include <cuda_runtime.h>
#include <cuda_gl_interop.h>
#include "../opengl/util.h"
#include "../opengl/glextfuncs.h"
#include "../opengl/coord.h"
#include "../opengl/shader.h"
#include "../opengl/pstring.h"
#include "../opengl/misc.h"
#include "balloon.cuh"
///
/// CUDA and OpenGL Support
///
/// CUDA API Error-Checking Wrapper
///
#define CE(call) \
{ \
const cudaError_t rv = call; \
if ( rv != cudaSuccess ) \
{ \
pStringF msg("CUDA error %d, %s\n",rv,cudaGetErrorString(rv)); \
pError_Msg(msg.s); \
} \
}
void vec_set(float3& a, pCoor b) {a.x = b.x; a.y = b.y; a.z = b.z;}
void vec_sets(pCoor& a, float3 b) {a.x = b.x; a.y = b.y; a.z = b.z; a.w=1;}
void vec_sets(pVect& a, float3 b) {a.x = b.x; a.y = b.y; a.z = b.z; }
///
/// Class for managing an OpenGL ARRAY_BUFFER
///
template <typename T>
class pBuffer_Object {
public:
pBuffer_Object(){ data = NULL; init(); }
~pBuffer_Object()
{
if ( data ) free(data);
glDeleteBuffers(created,bids);
}
private:
void init()
{
btarget = GL_ARRAY_BUFFER;
glGenBuffers(2, bids);
current = 0;
bid = bids[current];
created = 0;
pError_Check();
usage_hint = GL_DYNAMIC_COPY;
}
public:
T* alloc(int elements_p, GLenum hint = GL_DYNAMIC_COPY)
{
usage_hint = hint;
if ( data ) pError_Msg("Double allocation of pBuffer_Object.");
elements = elements_p;
chars = elements * sizeof(T);
data = new T[elements];
created = 1;
alloc_gpu_buffer();
glBindBuffer(btarget,0);
return data;
}
void take(PStack<T>& stack, GLenum hint = GL_DYNAMIC_COPY,
GLenum default_target = GL_ARRAY_BUFFER)
{
usage_hint = hint;
btarget = default_target;
if ( data ) pError_Msg("Double allocation of pBuffer_Object.");
elements = stack.occ();
chars = elements * sizeof(T);
data = stack.take_storage();
created = 1;
alloc_gpu_buffer();
glBindBuffer(btarget,0);
}
void prepare_two_buffers()
{
ASSERTS( created == 1 );
created = 2; bid_swap(); alloc_gpu_buffer(); bid_swap();
}
private:
void alloc_gpu_buffer()
{
bind();
glBufferData(btarget,chars,NULL,usage_hint);
pError_Check();
}
public:
void to_gpu()
{
bind();
glBufferData(btarget, chars, data, usage_hint);
pError_Check();
}
void from_gpu()
{
bind();
T* const from_data = (T*)glMapBuffer(GL_ARRAY_BUFFER,GL_READ_ONLY);
pError_Check();
memcpy(data,from_data,chars);
glUnmapBuffer(btarget);
glBindBuffer(btarget,0);
}
void bind(GLenum target){ glBindBuffer(target,bid); }
void bind(){ glBindBuffer(btarget,bid); }
GLuint bid_read() const { return bids[current]; }
GLuint bid_write()
{
bid_swap(); alloc_gpu_buffer(); bid_swap();
return bids[1-current];
}
GLuint bid_fresh()
{
alloc_gpu_buffer();
return bid;
}
void bid_swap()
{
current = 1 - current;
bid = bids[current];
}
T& operator [] (int idx) { return data[idx]; }
GLuint bids[2];
GLuint bid;
GLenum usage_hint;
GLenum btarget;
int created, current;
T *data;
int elements, chars;
};
///
/// Class for managing CUDA device memory.
///
template <typename T>
class pCUDA_Memory {
public:
pCUDA_Memory()
{
data = NULL; dev_addr[0] = dev_addr[1] = NULL; current = 0; bid = 0;
bo_ptr = NULL;
}
~pCUDA_Memory()
{
if ( data ) free(data);
if ( bid )
{
CE(cudaGLUnmapBufferObject(bid));
CE(cudaGLUnregisterBufferObject(bid));
glDeleteBuffers(1,&bid);
}
}
T* alloc(int elements_p)
{
if ( data ) pError_Msg("Double allocation of pCUDA_Memory.");
elements = elements_p;
chars = elements * sizeof(T);
data = new T[elements];
return data;
}
void take(PStack<T>& stack)
{
if ( data ) pError_Msg("Double allocation of pCUDA_Memory.");
elements = stack.occ();
chars = elements * sizeof(T);
data = stack.take_storage();
}
T& operator [] (int idx) const { return data[idx]; }
private:
void alloc_maybe() { if ( !dev_addr[current] ) alloc_gpu_buffer(); }
void alloc_gpu_buffer()
{
ASSERTS( !dev_addr[current] );
CE(cudaMalloc(&dev_addr[current],chars));
}
void alloc_gl_buffer()
{
if ( bid ) return;
glGenBuffers(1,&bid);
glBindBuffer(GL_ARRAY_BUFFER,bid);
glBufferData(GL_ARRAY_BUFFER,chars,NULL,GL_DYNAMIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER,0);
CE(cudaGLRegisterBufferObject(bid));
}
public:
T* get_dev_addr() { alloc_maybe(); return (T*)dev_addr[current];}
T* get_dev_addr_read() { return get_dev_addr(); }
T* get_dev_addr_write()
{
if ( !dev_addr[1-current] ) { swap(); alloc_gpu_buffer(); swap(); }
return (T*)dev_addr[1-current];
}
void to_cuda()
{
if ( !dev_addr[current] ) alloc_gpu_buffer();
CE(cudaMemcpy(dev_addr[current], data, chars, cudaMemcpyHostToDevice));
}
void from_cuda()
{
CE(cudaMemcpy(data, dev_addr[current], chars, cudaMemcpyDeviceToHost));
}
void cuda_to_gl()
{
alloc_gl_buffer();
// Due to a bug in CUDA 2.1 this is slower than copying through host.
CE(cudaGLMapBufferObject(&bo_ptr,bid));
CE(cudaMemcpy(bo_ptr, dev_addr[current], chars, cudaMemcpyDeviceToDevice));
CE(cudaGLUnmapBufferObject(bid));
}
void swap() { current = 1 - current; }
void set_primary() { current = 0; }
// Stuff below should be private to avoid abuse.
void *dev_addr[2], *bo_ptr;
GLuint bid;
int current;
T *data;
int elements, chars;
};
///
/// Ad-Hoc Class for Reading Images
///
using namespace Magick;
class P_Image_Read
{
public:
P_Image_Read(const char *path, int transp):
image(path),image_loaded(false),data(NULL)
{
width = image.columns();
height = image.rows();
size = width * height;
if ( !width || !height ) return;
if ( transp == 255 )
image.transparent(Color("White"));
pp = image.getPixels(0,0,width,height);
for ( int i = 0; i < size; i++ ) pp[i].opacity = MaxRGB - pp[i].opacity;
gl_fmt = GL_BGRA;
gl_type = sizeof(PixelPacket) == 8 ? GL_UNSIGNED_SHORT : GL_UNSIGNED_BYTE;
data = (unsigned char*) pp;
image_loaded = true;
};
void color_invert()
{
for ( int i = 0; i < size; i++ )
{
PixelPacket& p = pp[i];
const int sum = p.red + p.blue + p.green;
p.opacity = (typeof p.opacity)( MaxRGB - sum * 0.3333333 );
p.red = p.blue = p.green = MaxRGB;
}
}
Image image;
PixelPacket *pp;
bool image_loaded;
int width, height, maxval, size;
unsigned char *data;
int gl_fmt;
int gl_type;
private:
};
///
/// Create and initialize texture object using image file.
///
GLuint
pBuild_Texture_File
(const char *name, bool invert = false, int transp = 256 )
{
// Read image from file.
//
P_Image_Read image(name,transp);
if ( !image.image_loaded ) return 0;
// Invert colors. (E.g., to show text as white on black.)
//
if ( invert ) image.color_invert();
GLuint tid;
glGenTextures(1,&tid);
glBindTexture(GL_TEXTURE_2D,tid);
glTexParameteri(GL_TEXTURE_2D, GL_GENERATE_MIPMAP, 1);
// Load data into the texture object.
//
glTexImage2D
(GL_TEXTURE_2D,
0, // Level of Detail (0 is base).
GL_RGBA, // Internal format to be used for texture.
image.width, image.height,
0, // Border
image.gl_fmt, // GL_BGRA: Format of data read by this call.
image.gl_type, // GL_UNSIGNED_BYTE: Size of component.
(void*)image.data);
pError_Check();
return tid;
}
///
/// Main Data Structures
///
//
// class World: All data about scene.
// class Balloon: Data about a balloon.
//
// See also balloon.cuh
class World;
struct Balloon_Triangle {
int pi, qi, ri;
int pi_opp, qi_opp, ri_opp;
bool pi_less, qi_less, ri_less;
pColor color;
pVect normal;
float length_relaxed;
int block_number; // Used for preparing cuda data.
bool a_vtx(int v) const { return v == pi || v == qi || v == ri; }
int third_vtx(int v1, int v2) const
{ return ( v1 == pi && v2 == qi || v2 == pi && v1 == qi ? ri :
v1 == pi && v2 == ri || v2 == pi && v1 == ri ? qi :
v1 == qi && v2 == ri || v2 == qi && v1 == ri ? pi : -1 ); }
};
struct Balloon_Rep_Pair {
// Repulsion pairs.
int pi, qi;
};
struct Balloon_Vertex {
Balloon_Vertex(){ edge_out_count = edge_in_count = 0; }
// Constant Data (from time step to time step)
float mass_inv;
float mass;
int edge_out[7];
int edge_out_count;
PStack<int> triangles;
pCoor tex_coor;
// Data changed each time step.
pCoor pos;
pVect vel;
// Maybe generated and used in same time step.
pCoor pos_prev;
pVect force;
pVect force_spring;
pVect force_pressure;
pVect force_rep; // Repulsion.
pVect surface_normal;
// Only used during initialization.
int edge_in_count;
int edge_in[6];
double eta;
double theta;
int ring;
};
// Structural Data for OpenGL Physics, One per Triangle
//
struct GLP_Tri_Strc {
float pi, qi, ri; // Index of triangle's vertices.
float length_relaxed;
int pi_opp, qi_opp, ri_opp;
}; // 7 * 4 = 28 bytes
// Structural Data for OpenGL Physics, One per Vertex
//
struct GLP_Vtx_Strc {
float self_idx;
float left_idx; // Not used.
uint16_t neighbors[VTX_TRI_DEG_MAX]; // Index of vertex's triangles.
}; // 8 + 16 = 24 bytes. Yuck!
// GPU-Computed Balloon Data for Triangles
//
struct GLP_Tri_Data {
pCoor surface_normal; // Magnitude is area of incident triangles.
pCoor force_p;
pCoor force_q;
pCoor force_r;
};
// See also balloon.cuh.
// GPU-Computed Balloon Data for Vertices
//
struct GLP_Vtx_Data {
pCoor surface_normal; // Magnitude is area of incident triangles.
pCoor vel;
pCoor pos;
pCoor padding;
};
// See also balloon.cuh.
enum Data_Location { DL_CPU = 0x1, DL_GLP = 0x2, DL_CUDA = 0x4 };
enum GPU_Physics_Method
{ GP_cpu, GP_glp, GP_cuda_1_pass, GP_cuda_2_pass, GP_ENUM_SIZE };
const char* const gpu_physics_method_str[] =
{ "CPU", "OpenGL 2 Pass", "CUDA 1 Pass", "CUDA 2 Pass" };
class Balloon {
public:
Balloon(World& w):world(w)
{
cuda_initialized = false;
glp_initialized = false;
}
~Balloon(){ }
void init(pCoor center, double radius);
// Called each time user changes a configuration variable, such as gravity.
void update_for_config();
void init_glp();
void init_cuda();
void cuda_data_partition();
// Advance (time-step) simulated time.
//
void time_step_cpu(int steps);
void time_step_cpu_once();
void time_step_gpu(int steps); // Call appropriate time_step routine.
void time_step_glp(int steps); // Use OpenGL API for physics.
void time_step_cuda(int steps); // Use CUDA API for physics.
void gpu_data_to_cpu();
void glp_data_to_cpu();
void cuda_data_to_cpu();
void cpu_data_to_glp();
bool cpu_data_to_cuda(); // Return true if data transferred.
// User Interaction
//
void translate(pVect amt); // Instantly move balloon.
void push(pVect amt); // Instantly add velocity.
void stop() // Stop motion but not other motion.
{
pVect avg_vel = velocity_avg();
for ( int i=0; i<point_count; i++ ) points[i].vel -= avg_vel;
}
void freeze() // Stop all motion.
{
for ( int i=0; i<point_count; i++ ) points[i].vel = pVect(0,0,0);
}
pCoor centroid_compute()
{
pCoor point_sum(0,0,0);
for ( int i=0; i<point_count; i++ ) point_sum += points[i].pos;
point_sum.homogenize();
centroid = point_sum;
return centroid;
}
float pressure_air(float msl)
{
return opt_gravity ? exp( - 0.2 * air_particle_mass * msl ) : 1.0;
}
float pressure_gas(float msl, float factorp = 0)
{
const float factor = factorp ? factorp : gas_pressure_factor;
return opt_gravity ? factor * exp( - gas_m_over_temp * msl ) : factor;
}
void pressure_compute()
{
const float exp_air = pressure_air(centroid.y);
const float exp_gas = pressure_gas(centroid.y,1);
const float eff_volume = fabs( volume );
gas_pressure_factor = pressure_factor_coeff / eff_volume;
pressure = gas_pressure_factor * exp_gas / exp_air;
density_air =
( pressure_air(centroid.y - 0.5) - pressure_air(centroid.y + 0.5) )
/ opt_gravity_accel;
density_gas =
( pressure_gas(centroid.y - 0.5) - pressure_gas(centroid.y + 0.5) )
/ opt_gravity_accel;
}
pVect velocity_avg()
{
pVect vel_avg(0,0,0);
for ( int i=0; i<point_count; i++ ) vel_avg += points[i].vel;
vel_avg *= 1.0/point_count;
return vel_avg;
}
World& world;
// Structural Data
//
float radius; // Initial radius.
float nom_volume; // Volume based on initial radius
// Balloon Structure
//
PStack<Balloon_Vertex> points;
PStack<Balloon_Triangle> triangles;
PStack<Balloon_Rep_Pair> rep_pairs;
pBuffer_Object<GLuint> point_indices;
pBuffer_Object<float> tex_coords;
int point_count;
int tri_count;
int rep_pair_count;
int tethered_idx;
// Unchanging (or user set) Physical Constants
//
float spring_constant;
float rep_constant;
float air_resistance;
float surface_mass;
float gas_amount;
float gas_particle_mass;
float air_particle_mass;
float temperature;
float opt_gravity_accel;
float damping_v; // CPU and gpu. Higher is less damping.
float damping_factor; // CPU only code.
// User-Set Options (in addition to physical constants above).
//
bool opt_gravity; // If false, no gravity.
bool opt_damping; // Only used in cpu code. See also damping_v
bool opt_cpu_interleave; // When gpu physics on do 1 cpu time step / frame.
// Computed after each change to user-set physical quantity.
//
float temp_ratio; // Temperature ratio.
float gas_mass_per_vertex;
float pressure_factor_coeff;
float gas_pressure_factor;
double oversample; // Harmonic (approx) divided by time step delta t.
double tightness;
float damping_factor_per_step;
float point_mass;
float point_mass_inv;
// Computed each time step.
//
float volume;
float area;
pVect weight;
pCoor centroid;
float gas_m_over_temp; // Coefficient in pressure formula.
float pressure;
float density_air, density_gas;
// Computed but not yet correct.
//
double e_spring, e_kinetic;
double energy, e_zero;
GLuint texid_pse, texid_syl;
int cpu_iteration;
bool need_cpu_iteration;
bool length_relaxed_update;
int data_location;
GLuint data_bid;
int data_stride;
bool glp_initialized;
pBuffer_Object<GLP_Vtx_Data> glp_vtx_data;
pBuffer_Object<GLP_Tri_Data> glp_tri_data;
pBuffer_Object<GLP_Vtx_Strc> glp_vtx_strc;
pBuffer_Object<GLP_Tri_Strc> glp_tri_strc;
GLuint query_transform_feedback_id;
GLuint glp_vtx_data_tid;
GLuint glp_tri_data_tid;
GLuint framebuffer_id, renderbuffer_id;
pShader vs_plan_c;
GLint sat_indices, sat_volume, sat_pos, sat_vel;
GLint sun_constants_sc;
GLint sun_constants_gas, sun_constants_dt, sun_platform;
GLint svl_surface_normal, svl_force_or_v, svl_pos, svl_force_r;
GLint stx_data_vtx, stx_data_tri;
// CUDA Stuff
//
bool cuda_initialized;
bool cuda_constants_stale;
pCUDA_Memory<CUDA_Tri_Strc> cuda_tri_strc;
pCUDA_Memory<CUDA_Tri_Work_Strc> cuda_tri_work_strc;
pCUDA_Memory<CUDA_Tri_Data> cuda_tri_data;
pCUDA_Memory<CUDA_Vtx_Strc> cuda_vtx_strc;
pCUDA_Memory<CUDA_Vtx_Data> cuda_vtx_data;
pCUDA_Memory<float> cuda_tower_volumes;
pCUDA_Memory<float3> cuda_centroid_parts;
int tri_work_per_vtx;
int tri_work_per_vtx_lg;
dim3 Dg_tri, Db_tri, Dg_vtx, Db_vtx;
};
class World {
public:
World(pOpenGL_Helper &fb):ogl_helper(fb),balloon(*this){init();}
void init();
static void render_w(void *moi){ ((World*)moi)->render(); }
void render();
void cb_keyboard();
void modelview_update();
void shadow_update();
void shadow_transform_create(pMatrix& m, pCoor light);
pOpenGL_Helper& ogl_helper;
pVariable_Control variable_control;
pFrame_Timer frame_timer;
cudaEvent_t frame_start_ce, frame_stop_ce;
double world_time;
double delta_t; // Duration of time step.
// Tiled platform for balloon.
//
float platform_xmin, platform_xmax, platform_zmin, platform_zmax;
pBuffer_Object<pVect> platform_tile_coords;
pBuffer_Object<float> platform_tex_coords;
Balloon balloon;
pCoor light_location;
float opt_light_intensity;
enum { MI_Eye, MI_Light, MI_Balloon, MI_Balloon_V, MI_COUNT } opt_move_item;
bool opt_pause;
int opt_physics_method;
int opt_physics_method_last;
bool opt_surface_smooth;
pCoor eye_location;
pVect eye_direction;
pMatrix modelview;
pMatrix modelview_shadow;
pMatrix transform_mirror;
pShader vs_fixed;
};
void
World::init()
{
const double radius = 5;
pCoor center(13.7,12,-15.4);
frame_timer.work_unit_set("Steps / s");
world_time = 0;
delta_t = 1.0 / ( 32 * 30 );
balloon.data_location = DL_CPU;
balloon.data_bid = 0;
eye_location = pCoor(24.2,11.6,-38.7);
eye_direction = pVect(-0.42,-0.09,0.9);
opt_move_item = MI_Eye;
opt_light_intensity = 100.2;
opt_physics_method = GP_cuda_1_pass;
opt_surface_smooth = true;
platform_xmin = -40; platform_xmax = 40;
platform_zmin = -40; platform_zmax = 40;
light_location = pCoor(platform_xmax,platform_xmax,platform_zmin);
balloon.need_cpu_iteration = true;
balloon.length_relaxed_update = true;
balloon.damping_v = 0.1;
balloon.cpu_iteration = 0;
balloon.opt_gravity = true;
balloon.opt_damping = false;
balloon.opt_cpu_interleave = false;
balloon.damping_factor = 0.2;
balloon.spring_constant = 40.0;
balloon.rep_constant = 0.1;
balloon.air_resistance = 0.001;
balloon.gas_amount = 0;
balloon.surface_mass = 1;
balloon.e_zero = 0;
balloon.opt_gravity_accel = 9.8;
balloon.gas_particle_mass = 0.01;
balloon.air_particle_mass = 0.01;
balloon.temperature = 300;
opt_pause = false;
variable_control.insert(balloon.gas_amount,"Gas Amount");
variable_control.insert(balloon.opt_gravity_accel,"Gravity");
// variable_control.insert(balloon.temperature,"Temperature");
// variable_control.insert(balloon.damping_v,"Damping Factor");
variable_control.insert(balloon.air_resistance,"Air Resistance");
variable_control.insert(opt_light_intensity,"Light Intensity");
variable_control.insert(balloon.gas_particle_mass,"Gas Particle Mass");
variable_control.insert(balloon.spring_constant,"Spring Constant");
variable_control.insert(balloon.rep_constant,"Repulsion Constant");
variable_control.insert(balloon.surface_mass,"Surface Mass");
balloon.init(center,radius);
modelview_update();
const float tile_count = 19;
const float ep = 1.00001;
const float xdelta = ( platform_xmax - platform_xmin ) / tile_count * ep;
const float zdelta = ( platform_zmax - platform_zmin ) / tile_count * ep;
const float trmin = 0.05;
const float trmax = 0.7;
const float tsmin = 0;
const float tsmax = 0.4;
PStack<pVect> p_tile_coords;
PStack<pVect> p1_tile_coords;
PStack<float> p_tex_coords;
bool even = true;
for ( float x = platform_xmin; x < platform_xmax; x += xdelta )
for ( float z = platform_zmin; z < platform_zmax; z += zdelta )
{
PStack<pVect>& t_coords = even ? p_tile_coords : p1_tile_coords;
p_tex_coords += trmax; p_tex_coords += tsmax;
t_coords += pVect(x,-0.01,z);
p_tex_coords += trmax; p_tex_coords += tsmin;
t_coords += pVect(x,-0.01,z+zdelta);
p_tex_coords += trmin; p_tex_coords += tsmin;
t_coords += pVect(x+xdelta,-0.01,z+zdelta);
p_tex_coords += trmin; p_tex_coords += tsmax;
t_coords += pVect(x+xdelta,-0.01,z);
even = !even;
}
while ( pVect* const v = p1_tile_coords.iterate() ) p_tile_coords += *v;
platform_tile_coords.take(p_tile_coords);
platform_tile_coords.to_gpu();
platform_tex_coords.take(p_tex_coords);
platform_tex_coords.to_gpu();
}
void
Balloon::init(pCoor center, double r)
{
radius = r;
nom_volume = 4.0/3.0 * M_PI * r * r *r;
const int equator_points = 60;
const int slice_points_min = 6;
static const double two_pi = 2 * M_PI;
const double equator_interpoint_radians = two_pi / equator_points;
const double equator_interpoint = r * equator_interpoint_radians;
const double epsilon = 0.00001;
point_count = 0;
Balloon_Vertex* const pole_south = points.pushi();
pole_south->mass_inv = 1;
pole_south->theta = 10;
pole_south->pos = center + pVect(0,0,r);
pole_south->vel = pVect(0,0,0);
pole_south->ring = 0;
pole_south->eta = 0;
point_count++;
const double first_eta =
asin( double(slice_points_min) / equator_points );
const int rings =
int(0.5 + ( M_PI - 2 * first_eta ) / equator_interpoint_radians);
const double delta_eta = ( M_PI - 2 * first_eta ) / max(1,rings);
int ring_count = 0;
PStack<int> rings_first_idx;
rings_first_idx += 0;
for ( double eta = first_eta + epsilon;
eta <= M_PI-first_eta;
eta += delta_eta - epsilon )
{
ring_count++;
const double z = r * cos(eta);
const double slice_r = r * sin(eta);
const int slice_points = int( two_pi * slice_r / equator_interpoint );
const double delta_theta = two_pi / slice_points;
const int ring_first_idx = point_count;
const int lower_ring_first_idx = rings_first_idx.peek();
ASSERTS( ring_count != 1 || slice_points == slice_points_min );
ASSERTS( ring_count != rings + 1 || slice_points == slice_points_min );
rings_first_idx += point_count;
int lower_ring_idx = point_count - 1;
if ( points[lower_ring_idx].theta < two_pi - delta_theta )
lower_ring_idx = lower_ring_first_idx;
const int lower_ring_first_connected = lower_ring_idx;
double lower_ring_angle_adj =
lower_ring_idx == lower_ring_first_idx ? 0 : -two_pi;
const double theta_first = ring_count & 1 ? 0 : delta_theta * 0.5;
int slice_idx = 0;
for ( double theta = theta_first; theta < two_pi - 0.001;
theta += delta_theta )
{
Balloon_Vertex* const point = points.pushi();
point->mass_inv = 1;
point->ring = ring_count;
point->eta = eta;
point->theta = theta;
point->pos = center +
pVect(slice_r * cos(theta), slice_r * sin(theta), z );
point->vel = pVect(0,0,0);
point->edge_out[point->edge_out_count++] =
point_count - 1 + ( slice_idx ? 0 : slice_points );
const double next_theta = theta + delta_theta;
while ( true )
{
point->edge_out[point->edge_out_count++] = lower_ring_idx;
const bool lr_last = lower_ring_idx + 1 == ring_first_idx;
const int next_idx =
lr_last ? lower_ring_first_idx : lower_ring_idx + 1;
Balloon_Vertex* const n_lr_next = &points[next_idx];
const double next_angle_adj =
lower_ring_angle_adj + ( lr_last ? two_pi : 0 );
const double lower_ring_angle = n_lr_next->theta + next_angle_adj;
if ( lower_ring_angle > next_theta + 0.0001 ) break;
if ( slice_idx > 1 &&
lower_ring_idx == lower_ring_first_connected ) break;
lower_ring_idx = next_idx;
lower_ring_angle_adj = next_angle_adj;
}
slice_idx++; point_count++;
}
}
{
Balloon_Vertex* const pole_north = points.pushi();
pole_north->mass_inv = 1;
pole_north->theta = 10;
pole_north->pos = center + pVect(0,0,-r);
pole_north->vel = pVect(0,0,0);
pole_north->ring = ++ring_count;
pole_north->eta = M_PI;
const int lower_ring_first_idx = rings_first_idx.peek();
for ( int lower_ring_idx = lower_ring_first_idx;
lower_ring_idx != point_count; lower_ring_idx++ )
pole_north->edge_out[pole_north->edge_out_count++] = lower_ring_idx;
pole_north->edge_out[pole_north->edge_out_count++] = lower_ring_first_idx;
point_count++;
}
tethered_idx = 0;
float min_y = pole_south->pos.y;
for ( int idx = 0; idx < point_count; idx++ )
{
Balloon_Vertex* const p = &points[idx];
ASSERTS( p->mass_inv > 0 );
p->mass = 1.0 / p->mass_inv;
if ( p->pos.y < min_y ) { tethered_idx = idx; min_y = p->pos.y; }
for ( int j = 0; j < p->edge_out_count; j++ )
{
const int qi = p->edge_out[j];
Balloon_Vertex* const q = &points[qi];
q->edge_in[q->edge_in_count++] = idx;
}
}
PStack<GLuint> p_indices;
const double tex_eta_min = 0.25 * M_PI;
const double tex_eta_max = 0.75 * M_PI;
const double tex_theta_min = 0;
const double tex_theta_max = two_pi;
const double eta_to_s = 1.0 / ( tex_eta_max - tex_eta_min );
const double theta_to_s = 1.0 / ( tex_theta_max - tex_theta_min );
PStack<float> gpu_tex_coords;
for ( int idx = 0; idx < point_count; idx++ )
{
Balloon_Vertex* const p = &points[idx];
pColor color;
p->tex_coor.x = ( p->theta - tex_theta_min ) * theta_to_s;
p->tex_coor.y = ( p->eta - tex_eta_min ) * eta_to_s;
gpu_tex_coords += p->tex_coor.x;
gpu_tex_coords += p->tex_coor.y;
switch ( p->ring & 0x3 ) {
case 0: color = pColor(0.9,.1,.1); break;
case 1: color = pColor(0.1,0.9,0.1); break;
case 2: color = pColor(0.1,0.1,0.9); break;
case 3: color = pColor(0.1,0.9,0.9); break;
default: color = pColor(0.5,0.5,0.5); break;
}
const bool within_ring_first = !idx || p->ring != points[idx-1].ring;
const bool within_ring_last =
idx + 1 == point_count || p->ring != points[idx+1].ring;
for ( int e = 0; e < p->edge_out_count - 1; e++ )
{
const int qi = p->edge_out[e];
Balloon_Vertex* const q = &points[qi];
const int ri = p->edge_out[e+1];
Balloon_Vertex* const r = &points[ri];
for ( int qe=0; qe < q->edge_out_count-1; qe++ )
{ ASSERTS( q->edge_out[qe] != ri ); }
for ( int re=0; re < r->edge_out_count-1; re++ )
{ ASSERTS( r->edge_out[re] != idx ); }
const int tri_idx = triangles.occ();
Balloon_Triangle* const tri = triangles.pushi();
tri->pi = idx; tri->qi = qi; tri->ri = ri;
tri->color =
within_ring_first && e == 0 ? pColor(0.9,0.9,0.9) :
within_ring_last && e == p->edge_out_count -2
? pColor(0.1,0.1,0.1) : color;
p->triangles += tri_idx;
p_indices += idx;
p_indices += qi;
p_indices += ri;
}
}
tri_count = triangles.occ();
for ( int idx=0; idx<tri_count; idx++ )
{
Balloon_Triangle* const tri = &triangles[idx];
points[tri->qi].triangles += idx;
points[tri->ri].triangles += idx;
}
for ( int i=0; i<tri_count; i++ )
{
Balloon_Triangle* const tri = &triangles[i];
const int pi = tri->pi;
const int qi = tri->qi;
const int ri = tri->ri;
int opp_p = -1, opp_q = -1, opp_r = -1;
for ( int j=0; j<tri_count; j++ )
{
if ( i == j ) continue;
Balloon_Triangle* const tri2 = &triangles[j];
if ( opp_r < 0 ) opp_r = tri2->third_vtx(pi,qi);
if ( opp_p < 0 ) opp_p = tri2->third_vtx(ri,qi);
if ( opp_q < 0 ) opp_q = tri2->third_vtx(ri,pi);
}
ASSERTS( opp_p != -1 && opp_q != -1 && opp_r != -1 );
#define OP_SET(v) \
tri->v##i_opp = opp_##v; \
tri->v##i_less = v##i < opp_##v; \
if ( tri->v##i_less ) \
{ \
Balloon_Rep_Pair* const rp = rep_pairs.pushi(); \
rp->pi = v##i; rp->qi = opp_##v; \
}
OP_SET(p); OP_SET(q); OP_SET(r);
}
rep_pair_count = rep_pairs.occ();
texid_pse = pBuild_Texture_File("mult.png",false,255);
// texid_pse = pBuild_Texture_File("shot-emacs.png",false,255);
tex_coords.take(gpu_tex_coords,GL_STATIC_DRAW);
tex_coords.to_gpu();
if ( 1 )
texid_syl = pBuild_Texture_File("gp.png",false,255);
else
texid_syl = pBuild_Texture_File
("/home/faculty/koppel/teach/gpup09/gpup.png",false,255);
point_indices.take(p_indices,GL_STATIC_DRAW,GL_ELEMENT_ARRAY_BUFFER);
point_indices.to_gpu();
glp_vtx_data.alloc(point_count,GL_DYNAMIC_COPY);
glp_vtx_data.prepare_two_buffers();
glp_tri_data.alloc(tri_count,GL_DYNAMIC_COPY);
glp_tri_data.prepare_two_buffers();
glp_vtx_strc.alloc(point_count,GL_STATIC_DRAW);
glp_tri_strc.alloc(tri_count,GL_STATIC_DRAW);
cuda_tri_strc.alloc(tri_count);
cuda_vtx_strc.alloc(point_count);
for ( int idx = 0; idx < point_count; idx++ )
{
Balloon_Vertex* const p = &points[idx];
GLP_Vtx_Strc* const vd = &glp_vtx_strc[idx];
CUDA_Vtx_Strc* const vc = &cuda_vtx_strc[idx];
vd->self_idx = float( idx );
vd->left_idx = 0.5;
int np = 0;
for ( int ti = 0; p->triangles.iterate(ti); )
{
Balloon_Triangle* const tri = &triangles[ti];
const int pos = tri->pi == idx ? 0 : tri->qi == idx ? 1 : 2;
const int pos_packed = 4 * ti + pos;
vd->neighbors[np] = pos_packed;
ASSERTS( vd->neighbors[np] == pos_packed );
np++;
}
ASSERTS( np < VTX_TRI_DEG_MAX );
while ( np < VTX_TRI_DEG_MAX ) vd->neighbors[np++] = -1;
typeof vc->n0* const vcn = &vc->n0;
for ( int i=0; i<VTX_TRI_DEG_MAX; i++ ) vcn[i] = vd->neighbors[i];
}
for ( int idx=0; idx<tri_count; idx++ )
{
Balloon_Triangle* const tri = &triangles[idx];
GLP_Tri_Strc* const td = &glp_tri_strc[idx];
CUDA_Tri_Strc* const tc = &cuda_tri_strc[idx];
# define CPY_IDX(I) \
td->I = (typeof td->I)(tri->I); tc->I = (typeof tc->I)(tri->I);
CPY_IDX(pi); CPY_IDX(qi); CPY_IDX(ri);
CPY_IDX(pi_opp); CPY_IDX(qi_opp); CPY_IDX(ri_opp);
CPY_IDX(length_relaxed);
# undef CPY_IDX
}
}
void
Balloon::update_for_config()
{
temp_ratio = temperature / 300;
point_mass = surface_mass / point_count;
point_mass_inv = 1.0 / point_mass;
damping_factor_per_step = pow(damping_factor,world.delta_t);
const double ell = pow(volume,1/3.);
const double a = sqrt( 2.0 * ell * spring_constant * point_mass_inv );
oversample = M_PI / ( 2 * a * world.delta_t );
tightness = a;
gas_m_over_temp = 0.2 * gas_particle_mass / temp_ratio;
pressure_factor_coeff = gas_amount * temp_ratio;
const double mass_gas =
( pressure_gas(centroid.y - 0.5, pressure_factor_coeff)
- pressure_gas(centroid.y + 0.5, pressure_factor_coeff) )
/ opt_gravity_accel;
gas_mass_per_vertex = mass_gas / point_count;
}
void
World::modelview_update()
{
pMatrix_Translate center_eye(-eye_location);
pMatrix_Rotation rotate_eye(eye_direction,pVect(0,0,-1));
modelview = rotate_eye * center_eye;
shadow_update();
}
void
World::shadow_update()
{
// These routines need to be made more general.
pCoor platform_point(platform_xmin,0,platform_zmin);
pVect platform_normal(0,1,0);
shadow_transform_create(modelview_shadow,light_location);
pCoor eye_loc_mirror(eye_location.x, -eye_location.y, eye_location.z);
pMatrix reflect; reflect.set_identity(); reflect.rc(1,1) = -1;
transform_mirror = modelview * reflect * invert(modelview);
}
void
World::shadow_transform_create(pMatrix& m, pCoor light_location)
{
pVect platform_normal(0,1,0);
pVect eye_normal(0,0,-1);
pMatrix_Translate center_light(-light_location);
pNorm axis(-platform_normal,eye_normal);
const double angle = asin(axis.magnitude);
pMatrix_Rotation rotate_platform(axis,angle);
pMatrix frustum; frustum.set_zero();
#if 0
const bool light_at_infinity = light_location.w == 0;
if ( light_at_infinity )
{
frustum.rc(0,0) = frustum.rc(1,1) = frustum.rc(3,3) = 1;
}
else
#endif
{
frustum.rc(0,0) = frustum.rc(1,1) = light_location.y;
frustum.rc(3,2) = -1;
}
pMatrix_Translate restore_z(0,0,-light_location.y);
pMatrix step1 = rotate_platform * center_light;
pMatrix to_platform = restore_z * frustum * rotate_platform * center_light;
pMatrix_Rotation un_rotate_platform(axis,-angle);
pMatrix_Translate un_center_light(light_location);
pMatrix from_platform = un_center_light * un_rotate_platform;
pMatrix project = from_platform * to_platform;
modelview_shadow = modelview * from_platform * to_platform;
// Compute coordinates to help with debugging.
//
pCoor test_pt(1.1,0,2.2);
pCoor test_pt2(1.1,1,2.2);
pCoor test_pt_a = step1 * test_pt;
pCoor test_pt_b = to_platform * test_pt; test_pt_b.homogenize();
pCoor test_pt_pr = project * test_pt; test_pt_pr.homogenize();
pCoor test_pt2_pr = project * test_pt2; test_pt2_pr.homogenize();
}
void
Balloon::translate(pVect amt)
{
gpu_data_to_cpu();
for ( int idx = 0; idx < point_count; idx++ )
points[idx].pos += amt;
data_location = DL_CPU;
data_bid = 0;
}
void
Balloon::push(pVect amt)
{
gpu_data_to_cpu();
for ( int idx = 0; idx < point_count; idx++ )
points[idx].vel += amt;
data_location = DL_CPU;
data_bid = 0;
}
void
Balloon::time_step_cpu(int steps)
{
for ( int i=0; i<steps; i++ ) time_step_cpu_once();
}
void
Balloon::time_step_cpu_once()
{
const double friction_coefficient = 0.04;
const double bounce_factor = 0.0;
const double delta_t = world.delta_t;
pVect gravity(0,-opt_gravity_accel,0);
if ( !opt_gravity ) gravity = pVect(0,0,0);
const bool first_iteration = cpu_iteration == 0;
cpu_iteration++;
need_cpu_iteration = false;
double volume_x2 = 0;
double area_x2 = 0;
double kinetic_energy_total = 0;
double spring_energy_factor_total = 0;
pVect surface_error2(0,0,0);
centroid = pCoor(0,0,0,0);
weight = pVect(0,0,0);
for ( int idx = 0; idx < point_count; idx++ )
{
Balloon_Vertex* const p = &points[idx];
centroid += p->pos;
kinetic_energy_total += p->vel.mag();
p->force_spring = pVect(0,0,0);
p->force_rep = pVect(0,0,0);
p->surface_normal = pVect(0,0,0);
}
centroid.homogenize();
for ( int i = 0; i < tri_count; i++ )
{
Balloon_Triangle* const tri = &triangles[i];
Balloon_Vertex* const p = &points[tri->pi];
Balloon_Vertex* const q = &points[tri->qi];
Balloon_Vertex* const r = &points[tri->ri];
pCoor center = 1./3 * ( p->pos + q->pos + r->pos );
pVect pqr_cross(q->pos,p->pos,r->pos);
p->surface_normal += pqr_cross;
q->surface_normal += pqr_cross;
r->surface_normal += pqr_cross;
surface_error2 += pqr_cross;
const float tower_volume_x2 = -pqr_cross.y * center.y;
volume_x2 += tower_volume_x2;
const float triangle_area_x2 = pqr_cross.mag();
area_x2 += triangle_area_x2;
pVect p_to_c(p->pos,center);
pVect q_to_c(q->pos,center);
pVect r_to_c(r->pos,center);
const float perimeter = p_to_c.mag() + q_to_c.mag() + r_to_c.mag();
if ( length_relaxed_update )
{
tri->length_relaxed =
first_iteration ? perimeter * 0.5 : perimeter;
}
const float eff_length = max(0.0f, perimeter - tri->length_relaxed );
const float spring_force = eff_length * spring_constant;
p->force_spring += spring_force * p_to_c;
q->force_spring += spring_force * q_to_c;
r->force_spring += spring_force * r_to_c;
const double spring_energy = eff_length;
spring_energy_factor_total += spring_energy;
}
for ( int i=0; i<rep_pair_count; i++ )
{
Balloon_Rep_Pair* const rp = &rep_pairs[i];
Balloon_Vertex* const p = &points[rp->pi];
Balloon_Vertex* const q = &points[rp->qi];
pNorm p_to_q(p->pos,q->pos);
const double dist_sq_inv = rep_constant / max(0.001,p_to_q.mag_sq);
pVect rep_force(dist_sq_inv * p_to_q);
p->force_rep -= rep_force;
q->force_rep += rep_force;
}
length_relaxed_update = false;
volume = volume_x2 / 2.0;
area = area_x2 / 2.0;
if ( first_iteration )
{
const float exp_air = pressure_air(centroid.y);
const float exp_gas = pressure_gas(centroid.y,1);
double pf_sum = 0; // Pressure factor.
double area_sum_x6 = 0;
for ( int i=0; i<point_count; i++ )
{
Balloon_Vertex* const p = &points[i];
pNorm inward(p->surface_normal);
const double pf_balance = dot(p->force_spring,inward);
pf_sum += pf_balance;
area_sum_x6 += inward.magnitude;
}
const double area_sum = area_sum_x6 / 6;
gas_amount = damping_v *
( pf_sum / area_sum + exp_air ) * volume / ( temp_ratio * exp_gas );
update_for_config(); // Recompute pressure_factor_coeff.
}
pressure_compute();
const double spring_energy =
12 * pow(point_mass,-0.5) * spring_energy_factor_total;
const double kinetic_energy =
point_mass * kinetic_energy_total;
e_spring = spring_energy;
e_kinetic = kinetic_energy;
energy = e_spring + e_kinetic;
pVect surface_error(0,0,0);
for ( int i=0; i<point_count; i++ )
{
Balloon_Vertex* const p = &points[i];
surface_error += p->surface_normal;
}
for ( int i=0; i<point_count; i++ )
{
Balloon_Vertex* const p = &points[i];
const float gas_pressure = pressure_gas(p->pos.y);
const float air_pressure = pressure_air(p->pos.y);
p->surface_normal *= 1./6;
p->force_pressure =
( air_pressure - gas_pressure ) * p->surface_normal;
p->force = p->force_pressure;
pNorm vel_norm(-p->vel);
const double facing_area = max(0.0,dot(vel_norm,p->surface_normal));
pVect force_ar = - air_resistance * facing_area * p->vel;
pVect gforce = point_mass * p->mass * gravity;
p->force += gforce;
weight += p->force;
p->force += force_ar;
pVect force_ns = p->force; // Force non-spring.
pVect force_s = p->force_spring + p->force_rep;
p->force += force_s;
const float mass_wgas_inv_dt =
delta_t / ( point_mass * p->mass + gas_mass_per_vertex );
pVect delta_vns = mass_wgas_inv_dt * force_ns;
pVect delta_vs = mass_wgas_inv_dt * force_s;
pVect delta_v = delta_vns + delta_vs;
// pVect pos_verlet = p->pos - pos_prev + delta_t * delta_v;
p->pos_prev = p->pos;
p->pos += ( p->vel + 0.5 * delta_v ) * delta_t;
p->vel += damping_v * delta_vs + delta_vns;
}
if ( opt_damping )
{
pVect vel_avg = velocity_avg();
for ( int i=0; i<point_count; i++ )
{
Balloon_Vertex* const p = &points[i];
pVect local_vel = p->vel - vel_avg;
p->vel = damping_factor_per_step * local_vel;
}
pVect vel_avg2 = velocity_avg();
pVect vel_fix = vel_avg - vel_avg2;
for ( int i=0; i<point_count; i++ ) points[i].vel += vel_fix;
}
if ( first_iteration ) e_zero = energy;
for ( int idx = 0; idx < point_count; idx++ )
{
Balloon_Vertex* const p = &points[idx];
if ( p->pos.x < world.platform_xmin
|| p->pos.x > world.platform_xmax
|| p->pos.z < world.platform_zmin
|| p->pos.z > world.platform_zmax ) continue;
if ( p->pos.y > 0 ) continue;
if ( p->pos_prev.y < 0 ) continue;
p->pos.y = 0;
p->vel.y = - bounce_factor * p->vel.y;
const float gas_pressure = pressure_gas(p->pos.y);
pVect gforce = point_mass * p->mass * gravity;
const float f_y =
gforce.y + p->force_spring.y - gas_pressure * p->surface_normal.y;
if ( f_y >= 0 ) continue;
const float friction_force = -f_y * friction_coefficient;
const float delta_v = friction_force * delta_t / ( point_mass*p->mass );
const pNorm xzvel(p->vel.x,0,p->vel.z);
if ( xzvel.magnitude <= delta_v ) {
p->vel.x = 0; p->vel.z = 0;
}
else
p->vel -= delta_v * xzvel;
}
}
#define TRY_XF_FEEDBACK(routine,vertex_count) \
for ( int feedback_tries = 0; ; feedback_tries++ ) \
{ \
bool check = false; \
glBeginTransformFeedbackNV(GL_POINTS); pError_Check(); \
if ( check ) \
glBeginQuery \
(GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN_NV, \
query_transform_feedback_id ); \
routine; \
glEndTransformFeedbackNV(); pError_Check(); \
if ( !check ) break; \
glEndQuery(GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN_NV); \
int done_points = -1; \
glGetQueryObjectiv \
(query_transform_feedback_id,GL_QUERY_RESULT,&done_points); \
pError_Check(); \
if ( vertex_count == done_points ) break; \
printf("Warning, xf only got %d points, trying again.\n",done_points); \
if ( feedback_tries > 10 ) pError_Exit(); \
}
void
Balloon::time_step_gpu(int steps)
{
if ( world.opt_physics_method == GP_glp ) time_step_glp(steps);
else time_step_cuda(steps);
}
void
Balloon::init_glp()
{
glp_initialized = true;
glGenTextures(1,&glp_tri_data_tid);
glGenTextures(1,&glp_vtx_data_tid);
pError_Check();
glGenQueries(1,&query_transform_feedback_id);
pError_Check();
glGenFramebuffersEXT(1,&framebuffer_id);
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, framebuffer_id);
glGenRenderbuffersEXT(1, &renderbuffer_id);
glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, renderbuffer_id);
// GL_MAX_RENDERBUFFER_SIZE_EXT Maximum size of either dimension.
glRenderbufferStorageEXT // p 610
(GL_RENDERBUFFER_EXT, GL_FLOAT_RGBA_NV, 2, 2);
glFramebufferRenderbufferEXT
(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_RENDERBUFFER_EXT,
renderbuffer_id);
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0 );
glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, 0 );
pError_Check();
vs_plan_c.init("balloon-shader.cc","main_physics_plan_c();");
stx_data_vtx = vs_plan_c.uniform_location("tex_data_vtx");
stx_data_tri = vs_plan_c.uniform_location("tex_data_tri");
sun_constants_sc = vs_plan_c.uniform_location("constants_sc");
sun_constants_gas = vs_plan_c.uniform_location("constants_gas");
sun_constants_dt = vs_plan_c.uniform_location("constants_dt");
sun_platform = vs_plan_c.uniform_location("platform");
sat_volume = vs_plan_c.attribute_location("volume");
sat_indices = vs_plan_c.attribute_location("in_indices");
sat_pos = vs_plan_c.attribute_location("in_pos");
sat_vel = vs_plan_c.attribute_location("in_vel");
svl_surface_normal = vs_plan_c.varying_location("out_surface_normal");
svl_force_or_v = vs_plan_c.varying_location("out_force_or_v");
svl_pos = vs_plan_c.varying_location("out_pos");
svl_force_r = vs_plan_c.varying_location("out_force_r");
vs_plan_c.print_active_varying();
vs_plan_c.validate_once();
}
void
Balloon::time_step_glp(int steps)
{
if ( !glp_initialized ) init_glp();
cpu_data_to_glp();
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, framebuffer_id);
glDisable(GL_DEPTH_TEST);
glDisable(GL_STENCIL_TEST);
glDisable(GL_TEXTURE_2D);
glDisable(GL_ALPHA_TEST);
glEnable(GL_BLEND);
glBlendEquation(GL_FUNC_ADD);
glBlendFunc(GL_ONE,GL_ONE);
glClampColorARB(GL_CLAMP_VERTEX_COLOR_ARB,GL_FALSE);
pError_Check();
glClearColor( 0.0, 0.0, 0.0, 0.0 );
pError_Check();
vs_plan_c.use();
glUniform1i(stx_data_vtx,0);
glUniform1i(stx_data_tri,1);
glUniform4f
(sun_constants_sc,spring_constant,damping_v,
pressure_factor_coeff,
gas_m_over_temp);
glUniform4f
(sun_constants_gas,
air_resistance, gas_mass_per_vertex,
air_particle_mass, opt_gravity ? opt_gravity_accel : 0.0 );
glUniform4f
(sun_constants_dt,
world.delta_t, rep_constant, point_mass, point_mass_inv);
glUniform4f
(sun_platform,
world.platform_xmin, world.platform_xmax,
world.platform_zmin, world.platform_zmax);
const GLint svl_p1[] =
{ svl_surface_normal, svl_force_or_v, svl_pos, svl_force_r };
glTransformFeedbackVaryingsNV
(vs_plan_c.pobject, 4, &svl_p1[0], GL_INTERLEAVED_ATTRIBS_NV);
pError_Check();
GLP_Vtx_Data before = glp_vtx_data.data[0];
if ( steps ) data_location = DL_GLP;
pError_Check();
glEnableClientState(GL_VERTEX_ARRAY);
glActiveTexture(GL_TEXTURE0); pError_Check();
glBindTexture(GL_TEXTURE_BUFFER_EXT,glp_vtx_data_tid); pError_Check();
glActiveTexture(GL_TEXTURE1);
glBindTexture(GL_TEXTURE_BUFFER_EXT,glp_tri_data_tid); pError_Check();
glEnableVertexAttribArray(sat_indices);
for ( int i=0; i<steps; i++ )
{
const bool skip_volume = i + 1 != steps && true && ( i & 0x3 );
//
// Pass 1, Triangles
//
glActiveTexture(GL_TEXTURE0);
glTexBufferEXT // Attaches to the active buffer texture.
(GL_TEXTURE_BUFFER_EXT, GL_RGBA32F_ARB, glp_vtx_data.bid_read());
glActiveTexture(GL_TEXTURE1);
glBindTexture(GL_TEXTURE_BUFFER_EXT,0);
const int tstride = sizeof(glp_tri_strc[0]);
glp_tri_strc.bind();
glVertexAttribIPointerEXT(sat_indices, 4, GL_INT, tstride, (void*)16);
glVertexPointer(4, GL_FLOAT, sizeof(glp_tri_strc[0]), 0);
glBindBuffer(GL_ARRAY_BUFFER,0);
glBindBufferBaseNV
(GL_TRANSFORM_FEEDBACK_BUFFER_NV, 0, glp_tri_data.bid_fresh());
pError_Check();
if ( skip_volume )
{
glEnable(GL_RASTERIZER_DISCARD_NV);
}
else
{
glDisable(GL_RASTERIZER_DISCARD_NV);
glClear(GL_COLOR_BUFFER_BIT);
}
TRY_XF_FEEDBACK( glDrawArrays(GL_POINTS,0,tri_count), tri_count);
if ( !skip_volume )
{
glReadBuffer(GL_COLOR_ATTACHMENT0_EXT); pError_Check();
pCoor pb[4];
glReadPixels(0,0,2,2,GL_RGBA,GL_FLOAT,&pb[0]);
pError_Check();
centroid = (1.0/tri_count)*pb[0];
volume = 0.5 * pb[0].w;
}
if ( false )
{
glp_tri_data.from_gpu();
GLP_Tri_Data after_sf = glp_tri_data.data[0];
pError_Msg("Check.");
}
//
// Pass 2, Vertices
//
glEnable(GL_RASTERIZER_DISCARD_NV);
glVertexAttrib1f(sat_volume,volume); pError_Check();
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_BUFFER_EXT,0);
glActiveTexture(GL_TEXTURE1);
glBindTexture(GL_TEXTURE_BUFFER_EXT,glp_tri_data_tid); pError_Check();
glTexBufferEXT
(GL_TEXTURE_BUFFER_EXT, GL_RGBA32F_ARB, glp_tri_data.bid);
pError_Check();
glp_vtx_strc.bind();
const int vstride = sizeof(glp_vtx_strc[0]);
glVertexPointer(2, GL_FLOAT, vstride, 0);
glVertexAttribIPointerEXT(sat_indices, 4, GL_INT, vstride, (void*)8);
const int dvstride = sizeof(GLP_Vtx_Data);
glp_vtx_data.bind();
glVertexAttribPointer(sat_vel, 4, GL_FLOAT, false, dvstride, (void*)16);
glVertexAttribPointer(sat_pos, 4, GL_FLOAT, false, dvstride, (void*)32);
glEnableVertexAttribArray(sat_pos);
glEnableVertexAttribArray(sat_vel);
glBindBuffer(GL_ARRAY_BUFFER,0);
glBindBufferBaseNV
(GL_TRANSFORM_FEEDBACK_BUFFER_NV, 0, glp_vtx_data.bid_write());
pError_Check();
TRY_XF_FEEDBACK( glDrawArrays(GL_POINTS,0,point_count), point_count);
glDisableVertexAttribArray(sat_pos);
glDisableVertexAttribArray(sat_vel);
glp_vtx_data.bid_swap();
}
if ( false )
{
GLP_Vtx_Data after = glp_vtx_data.data[0];
pError_Msg("Check.");
}
world.vs_fixed.use();
glDisableClientState(GL_VERTEX_ARRAY);
glDisableVertexAttribArray(sat_indices);
glBindBuffer(GL_ARRAY_BUFFER,0);
glClampColorARB(GL_CLAMP_VERTEX_COLOR_ARB,GL_TRUE);
glDisable(GL_RASTERIZER_DISCARD_NV);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_BUFFER_EXT,0);
glActiveTexture(GL_TEXTURE1);
glBindTexture(GL_TEXTURE_BUFFER_EXT,0);
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0 );
data_bid = glp_vtx_data.bid_read();
data_stride = sizeof(glp_vtx_data[0]);
pressure_compute();
pError_Check();
}
void
Balloon::cuda_data_partition()
{
const int block_lg = CUDA_VTX_BLOCK_LG;
const int block_size = 1 << block_lg;
const int block_mask = block_size - 1;
const int block_count = int(0.9999 + double(point_count) / block_size);
for ( int i=0; i<tri_count; i++ ) triangles[i].block_number = -1;
PStack<CUDA_Tri_Work_Strc> tri_work;
PStack<int> work_sizes;
int work_max = 0;
for ( int i=0; i<block_count; i++ )
{
const int start = i * block_size;
const int stop = start + block_size;
int uniq_tris = 0;
for ( int vi=start; vi<stop; vi++ )
{
Balloon_Vertex* const p = &points[vi];
for ( int ti = 0; p->triangles.iterate(ti); )
{
Balloon_Triangle* const tri = &triangles[ti];
if ( tri->block_number == i ) continue;
tri->block_number = i;
uniq_tris++;
CUDA_Tri_Work_Strc* const tw = tri_work.pushi();
tw->pi = int(tri->pi); tw->qi = int(tri->qi);
tw->ri = int(tri->ri);
tw->vi_opp0 = tw->vi_opp1 = tw->vi_opp2 = -1;
tw->length_relaxed = float(ti);
}
}
work_sizes += uniq_tris;
work_max = max(work_max,uniq_tris);
}
tri_work_per_vtx_lg = int(0.998 + log2(double(work_max)/block_size));
tri_work_per_vtx = 1 << tri_work_per_vtx_lg;
ASSERTS( tri_work_per_vtx_lg <= 3 );
const int work_per_block = tri_work_per_vtx * block_size;
const int work_count = work_per_block * block_count;
cuda_tri_work_strc.alloc( work_count );
int ci = 0, wi = 0;
CUDA_Tri_Work_Strc tw_pad; memset(&tw_pad,-1,sizeof(tw_pad));
int waste = 0;
for ( int amt = 0; work_sizes.iterate(amt); )
{
for ( int i=0; i<amt; i++ ) cuda_tri_work_strc[ci++] = tri_work[wi++];
int pad = work_per_block - amt;
waste += pad;
while ( pad-- ) cuda_tri_work_strc[ci++] = tw_pad;
}
for ( int i=0; i<work_count; i++ ) cuda_tri_work_strc[i].pull_i = 0;
int max_pull = 0;
const int pull_limit = 4;
for ( int i=0; i<work_count; i++ )
{
CUDA_Tri_Work_Strc* const tw = &cuda_tri_work_strc[i];
if ( tw->pi == -1 ) continue;
const int ti = int(tw->length_relaxed);
Balloon_Triangle* const tri = &triangles[ti];
typeof tri->pi_opp* const t_optr = &tri->pi_opp;
const int round = i % tri_work_per_vtx;
const int blk = i / tri_work_per_vtx >> block_lg;
const int tid = i / tri_work_per_vtx & block_mask;
typeof tw->pi* const w_vptr = &tw->pi;
for ( int v=0; v<3; v++ )
{
const int vi = w_vptr[v];
const int vi_blk = vi >> block_lg;
if ( vi_blk != blk ) continue;
const int vi_tri = vi * tri_work_per_vtx + round;
CUDA_Tri_Work_Strc* const tv = &cuda_tri_work_strc[vi_tri];
typeof tv->vi_opp0* const w_optr = &tv->vi_opp0;
typeof tv->pull_tid_0* const w_pptr = &tv->pull_tid_0;
ASSERTS( tv->pull_i < pull_limit );
w_optr[tv->pull_i] = t_optr[v];
w_pptr[tv->pull_i] = tid;
tv->pull_i++;
if ( tv->pull_i > max_pull ) max_pull = tv->pull_i;
}
}
for ( int i=0; i<work_count; i++ )
{
CUDA_Tri_Work_Strc* const tw = &cuda_tri_work_strc[i];
tw->pull_i <<= 1;
if ( tw->pi == -1 ) continue;
const int ti = int(tw->length_relaxed);
Balloon_Triangle* const tri = &triangles[ti];
if ( tri->block_number != block_count )
{
tw->pull_i |= 1;
tri->block_number = block_count;
}
tw->length_relaxed = tri->length_relaxed;
}
}
template <typename T> void to_dev_ds(const char* const dst_name, T src)
{
T cpy = src;
CE(cudaMemcpyToSymbol(dst_name, &cpy, sizeof(T), 0, cudaMemcpyHostToDevice));
}
void to_dev_ds(const char* const dst_name, double& src)
{
ASSERTS( false );
}
#define TO_DEV_DS(dst,src) to_dev_ds(#dst,src);
#define TO_DEV(var) to_dev_ds<typeof var>(#var,var)
#define TO_DEV_OM(obj,memb) to_dev_ds(#memb,obj.memb)
#define TO_DEV_OM_F(obj,memb) to_dev_ds(#memb,float(obj.memb))
void
Balloon::init_cuda()
{
cuda_initialized = true;
cuda_constants_stale = true;
int device_count;
cudaGetDeviceCount(&device_count);
ASSERTS( device_count );
const int dev = 0;
cudaDeviceProp prop;
CE(cudaGetDeviceProperties(&prop,dev));
CE(cudaGLSetGLDevice(dev));
printf
("GPU: %s @ %.2f GHz WITH %d MiB GLOBAL MEM\n",
prop.name, prop.clockRate/1e6, prop.totalGlobalMem >> 20);
printf
("CAP: %d.%d NUM MP: %d TH/BL: %d SHARED: %d CONST: %d "
"# REGS: %d\n",
prop.major, prop.minor,
prop.multiProcessorCount, prop.maxThreadsPerBlock,
prop.sharedMemPerBlock, prop.totalConstMem,
prop.regsPerBlock
);
CE(cudaEventCreate(&world.frame_start_ce));
CE(cudaEventCreate(&world.frame_stop_ce));
cuda_vtx_strc.to_cuda();
TO_DEV_DS(vtx_strc,cuda_vtx_strc.get_dev_addr());
cuda_tri_strc.to_cuda();
TO_DEV_DS(tri_strc,cuda_tri_strc.get_dev_addr());
cuda_tri_data.alloc(tri_count);
TO_DEV_DS(tri_data,cuda_tri_data.get_dev_addr());
cuda_vtx_data.alloc(point_count);
const int max_block_count =
int(0.5
+ double(tri_count)/min(CUDA_TRI_BLOCK_SIZE,CUDA_VTX_BLOCK_SIZE));
cuda_tower_volumes.alloc(max_block_count);
TO_DEV_DS(tower_volumes,cuda_tower_volumes.get_dev_addr());
cuda_centroid_parts.alloc(max_block_count);
TO_DEV_DS(centroid_parts,cuda_centroid_parts.get_dev_addr());
cuda_data_partition();
cuda_tri_work_strc.to_cuda();
TO_DEV_DS(tri_work_strc,cuda_tri_work_strc.get_dev_addr());
TO_DEV(tri_work_per_vtx);
TO_DEV(tri_work_per_vtx_lg);
Dg_tri.x = int(ceil(double(tri_count)/CUDA_TRI_BLOCK_SIZE));
Dg_tri.y = Dg_tri.z = 1;
Db_tri.x = CUDA_TRI_BLOCK_SIZE;
Db_tri.y = Db_tri.z = 1;
Dg_vtx.x = int(ceil(double(point_count)/CUDA_VTX_BLOCK_SIZE));
Dg_vtx.y = Dg_vtx.z = 1;
Db_vtx.x = CUDA_VTX_BLOCK_SIZE;
Db_vtx.y = Db_vtx.z = 1;
for ( int i=0; i<2; i++ )
{
dim3 Dg = i==0 ? Dg_tri : Dg_vtx;
dim3 Db = i==0 ? Db_tri : Db_vtx;
const char* const pass = i==0 ? "Triangle" : "Vertex";
const double warp_per_block = double(Db.x) / 32;
const double block_per_mp = double(Dg.x) / prop.multiProcessorCount;
const double warp_per_mp = ceil(warp_per_block) * block_per_mp;
printf("\n%s pass block size %d thds, %d warps.\n",
pass, Db.x, Db.x >> 5);
printf("Grid size %d blks, %.1f blks / MP, %.1f warps / MP.\n",
Dg.x,
block_per_mp, warp_per_mp );
const double warp_util = double(min(32u,Db.x))/32;
const int data_dist = 2;
const int latency_fp_warps = 24 / 4;
const int latency_mem_warps = 400 / 4;
// Assume no switch between blocks to hide fp latency.
//
const double lat_coverage_fp =
min( warp_per_block * data_dist / latency_fp_warps, 1.0 );
// Assume gpu does switch between blocks to hide mem latency.
//
const double lat_coverage_mem =
min( warp_per_mp * data_dist / latency_mem_warps, 1.0 );
printf("Approx CP util fp %.3f * %.3f = %.3f (%.3f) for avg dist %d\n",
warp_util, lat_coverage_fp,
warp_util * lat_coverage_fp,
warp_util * lat_coverage_fp * prop.multiProcessorCount,
data_dist);
printf("Approx CP util mem %.3f * %.3f = %.3f (%.3f) for avg dist %d\n",
warp_util, lat_coverage_mem,
warp_util * lat_coverage_mem,
warp_util * lat_coverage_mem * prop.multiProcessorCount,
data_dist);
}
}
void
Balloon::time_step_cuda(int steps)
{
static int cuda_iteration = 0;
cuda_iteration++;
if ( !cuda_initialized ) init_cuda();
CE(cudaEventRecord(world.frame_start_ce,0));
cpu_data_to_cuda();
if ( cuda_constants_stale )
{
cuda_constants_stale = false;
TO_DEV_DS(volume_cpu,volume); // For debugging, not used by gpu.
TO_DEV(tri_count);
TO_DEV(point_count);
TO_DEV(spring_constant);
TO_DEV(damping_v);
TO_DEV(pressure_factor_coeff);
TO_DEV(gas_m_over_temp);
TO_DEV(air_resistance);
TO_DEV(gas_mass_per_vertex);
TO_DEV(air_particle_mass);
float gravity_mag = opt_gravity ? opt_gravity_accel : 0;
TO_DEV(gravity_mag);
TO_DEV(opt_gravity);
TO_DEV_OM_F(world,delta_t);
TO_DEV(rep_constant);
TO_DEV(point_mass);
TO_DEV(point_mass_inv);
TO_DEV_OM(world,platform_xmin);
TO_DEV_OM(world,platform_xmax);
TO_DEV_OM(world,platform_zmin);
TO_DEV_OM(world,platform_zmax);
cuda_tower_volumes.set_primary();
}
const CUDA_Vtx_Data vtest = cuda_vtx_data[42];
if ( steps ) data_location = DL_CUDA;
const bool two_pass = world.opt_physics_method == GP_cuda_2_pass;
for ( int i=0; i<steps; i++ )
{
CUDA_Vtx_Data* const vtx_data_in_d = cuda_vtx_data.get_dev_addr_read();
CUDA_Vtx_Data* const vtx_data_out_d = cuda_vtx_data.get_dev_addr_write();
if ( two_pass )
{
pass_triangles_launch
(Dg_tri, Db_tri, vtx_data_in_d, cuda_vtx_data.chars);
pass_vertices_launch
(Dg_vtx, Db_vtx,
cuda_tri_data.get_dev_addr(), vtx_data_out_d,
cuda_tri_data.chars);
}
else
{
float* const tv_in = cuda_tower_volumes.get_dev_addr_read();
float* const tv_out = cuda_tower_volumes.get_dev_addr_write();
pass_unified_launch
(Dg_vtx, Db_vtx,
vtx_data_in_d, vtx_data_out_d, tv_in, tv_out,
cuda_tri_data.chars, cuda_vtx_data.chars);
cuda_tower_volumes.swap();
}
cuda_vtx_data.swap();
}
if ( false )
{
// Due to a CUDA bug this is slower than just copying through host.
cuda_vtx_data.cuda_to_gl();
// cuda_centroid_parts.from_cuda();
data_bid = cuda_vtx_data.bid;
data_stride = sizeof(cuda_vtx_data[0]);
}
else
{
cuda_data_to_cpu();
}
{
cuda_tower_volumes.from_cuda();
const int blocks = two_pass ? Dg_tri.x : Dg_vtx.x;
volume = 0;
for ( int i=0; i<blocks; i++ ) volume += cuda_tower_volumes[i];
}
CE(cudaEventRecord(world.frame_stop_ce,0));
CE(cudaEventSynchronize(world.frame_stop_ce));
float cuda_time = -1.1;
CE(cudaEventElapsedTime(&cuda_time,world.frame_start_ce,world.frame_stop_ce));
world.frame_timer.cuda_frame_time_set(cuda_time);
if ( !opt_cpu_interleave )
{
centroid_compute();
pressure_compute();
}
CUDA_Vtx_Data vtest_aftera = cuda_vtx_data[42];
#undef TO_DEV
#undef TO_DEV_OM
}
void
Balloon::gpu_data_to_cpu()
{
if ( world.opt_physics_method == GP_glp ) glp_data_to_cpu();
else cuda_data_to_cpu();
}
void
Balloon::glp_data_to_cpu()
{
if ( data_location & DL_CPU ) return;
data_location |= DL_CPU;
glp_vtx_data.from_gpu();
for ( int idx=0; idx<point_count; idx++ )
{
Balloon_Vertex* const p = &points[idx];
GLP_Vtx_Data* const g = &glp_vtx_data[idx];
p->pos = g->pos;
p->vel = g->vel;
p->surface_normal = g->surface_normal;
}
}
void
Balloon::cuda_data_to_cpu()
{
if ( data_location & DL_CPU ) return;
data_location |= DL_CPU;
cuda_vtx_data.from_cuda();
for ( int idx=0; idx<point_count; idx++ )
{
Balloon_Vertex* const p = &points[idx];
CUDA_Vtx_Data* const g = &cuda_vtx_data[idx];
vec_sets(p->pos,g->pos);
vec_sets(p->vel,g->vel);
vec_sets(p->surface_normal,g->surface_normal);
}
}
void
Balloon::cpu_data_to_glp()
{
if ( data_location & DL_GLP ) return;
data_location |= DL_GLP;
for ( int idx=0; idx<point_count; idx++ )
{
Balloon_Vertex* const p = &points[idx];
GLP_Vtx_Data* const g = &glp_vtx_data[idx];
g->pos = p->pos;
g->vel = p->vel;
g->surface_normal = p->surface_normal;
}
for ( int idx=0; idx<tri_count; idx++ )
{
Balloon_Triangle* const tri = &triangles[idx];
GLP_Tri_Strc* const td = &glp_tri_strc[idx];
td->length_relaxed = tri->length_relaxed;
}
glp_vtx_data.to_gpu();
glp_vtx_strc.to_gpu();
glp_tri_strc.to_gpu();
glBindBuffer(GL_ARRAY_BUFFER,0);
}
bool
Balloon::cpu_data_to_cuda()
{
if ( data_location & DL_CUDA ) return false;
data_location |= DL_CUDA;
for ( int idx=0; idx<point_count; idx++ )
{
Balloon_Vertex* const p = &points[idx];
CUDA_Vtx_Data* const g = &cuda_vtx_data[idx];
vec_set(g->pos,p->pos);
vec_set(g->vel,p->vel);
}
for ( int idx=0; idx<tri_count; idx++ )
{
Balloon_Triangle* const tri = &triangles[idx];
CUDA_Tri_Strc* const tc = &cuda_tri_strc[idx];
tc->length_relaxed = tri->length_relaxed;
}
for ( int idx=1; idx<cuda_tower_volumes.elements; idx++ )
cuda_tower_volumes[idx] = 0;
cuda_tower_volumes[0] = volume;
cuda_tower_volumes.set_primary();
cuda_tower_volumes.to_cuda();
cuda_tower_volumes.swap();
cuda_tower_volumes.to_cuda();
cuda_tower_volumes.set_primary();
cuda_tri_strc.to_cuda(); // OPT: Only send if dirty.
cuda_vtx_data.to_cuda();
cuda_vtx_strc.to_cuda(); // OPT: Only send if dirty.
return true;
}
// Display a tetrahedron, used to indicate light position.
//
void
insert_tetrahedron(pCoor& loc, float size)
{
pCoor v0(loc.x,loc.y,loc.z);
pCoor v1(loc.x,loc.y-size,loc.z+size);
pCoor v2(loc.x-.866*size,loc.y-size,loc.z-0.5*size);
pCoor v3(loc.x+.866*size,loc.y-size,loc.z-0.5*size);
static pColor c1(0xffffff);
static pColor c2(0xff00);
glDisable(GL_LIGHTING);
#define TRI(va,vb,vc) \
{ \
pVect n = cross(va,vb,vc); \
glNormal3fv(n); \
glColor3fv(c1); glVertex3fv(va); \
glColor3fv(c2); glVertex3fv(vb); \
glVertex3fv(vc); \
}
glBegin(GL_TRIANGLES);
TRI(v0,v1,v2); TRI(v0,v2,v3); TRI(v0,v3,v1);
glEnd();
# undef TRI
glEnable(GL_LIGHTING);
}
void
tube_tapered(pCoor base, float radius, pVect to_apex)
{
const int sides = 10;
const double delta_theta = 2 * M_PI / sides;
const double base_radius = 1;
const double apex_radius = 0.1;
const double apex_height = 1;
const double alpha = atan2(apex_height,base_radius-apex_radius);
const double vec_z = sin(alpha);
const float to_height = to_apex.mag();
glMatrixMode(GL_MODELVIEW);
glPushMatrix();
pVect from_apex(0,0,1);
pVect rn(from_apex,to_apex);
const float rot_angle = pangle(from_apex,to_apex);
glTranslatef(base.x,base.y,base.z);
glRotatef(rot_angle * 180.0 / M_PI,rn.x,rn.y,rn.z);
glScalef(radius,radius,to_height);
glBegin(GL_QUAD_STRIP);
for ( int i=0; i<=sides; i++ )
{
const double theta = delta_theta * i;
const double cos_t = cos(theta);
const double sin_t = sin(theta);
glNormal3f( cos_t, sin_t, vec_z );
glVertex3f( apex_radius * cos_t, apex_radius * sin_t, apex_height);
glVertex3f( base_radius * cos_t, base_radius * sin_t, 0);
}
glEnd();
glPopMatrix();
}
void
World::render()
{
cb_keyboard();
frame_timer.frame_start();
if ( opt_physics_method == GP_cpu ) balloon.gpu_data_to_cpu();
if ( world_time == 0 ) world_time = time_wall_fp();
if ( opt_pause )
{
world_time = time_wall_fp();
}
else
{
// Advance simulated time.
//
const double time_start = time_wall_fp();
const double sim_time_needed = time_start - world_time;
delta_t = 1.0 / ( 30 * ( opt_physics_method ? 40 : 20 ) ) ;
const int time_steps_needed = int( sim_time_needed / delta_t );
const int time_steps = min(time_steps_needed,100);
balloon.update_for_config();
if ( opt_physics_method &&
( balloon.need_cpu_iteration || balloon.opt_cpu_interleave ) )
balloon.time_step_cpu_once();
if ( opt_physics_method ) balloon.time_step_gpu(time_steps);
else balloon.time_step_cpu(time_steps);
frame_timer.work_amt_set(time_steps);
world_time += delta_t * time_steps;
}
// Rescue balloon if it is sinking into the abyss.
//
if ( balloon.centroid.y < -50 )
{
pVect rescue_vector = pCoor(0,12,-12) - balloon.centroid;
balloon.translate(rescue_vector);
}
const pColor white(0xffffff);
const pColor gray(0x303030);
const pColor lsu_business_purple(0x7f5ca2);
const pColor lsu_spirit_purple(0x580da6);
const pColor lsu_spirit_gold(0xf9b237);
const pColor lsu_official_purple(0x2f0462);
const pColor dark(0);
const int win_width = ogl_helper.get_width();
const int win_height = ogl_helper.get_height();
const float aspect = float(win_width) / win_height;
glMatrixMode(GL_MODELVIEW);
glLoadTransposeMatrixf(modelview);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
// Frustum: left, right, bottom, top, near, far
glFrustum(-.8,.8,-.8/aspect,.8/aspect,1,5000);
glViewport(0, 0, win_width, win_height);
pError_Check();
glClearColor(0,0,0,0.5);
glClearDepth(1.0);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT );
glEnable(GL_DEPTH_TEST);
glDepthFunc(GL_LESS);
glDisable(GL_BLEND);
glLightModeli(GL_LIGHT_MODEL_TWO_SIDE,1);
glLightfv(GL_LIGHT0, GL_POSITION, light_location);
glLightf(GL_LIGHT0, GL_CONSTANT_ATTENUATION, 0.3);
glLightf(GL_LIGHT0, GL_LINEAR_ATTENUATION, 1.0);
glLightf(GL_LIGHT0, GL_QUADRATIC_ATTENUATION, 0);
pColor ambient_color(0x999999);
glLightModelfv(GL_LIGHT_MODEL_AMBIENT, ambient_color);
glLightfv(GL_LIGHT0, GL_DIFFUSE, white * opt_light_intensity);
glLightfv(GL_LIGHT0, GL_AMBIENT, dark);
glLightfv(GL_LIGHT0, GL_SPECULAR, white * opt_light_intensity);
glEnable(GL_LIGHT0);
glEnable(GL_LIGHTING);
glEnable(GL_COLOR_MATERIAL);
glColorMaterial(GL_FRONT_AND_BACK,GL_AMBIENT_AND_DIFFUSE);
glShadeModel(GL_SMOOTH);
pColor color_ball(0x666666);
pColor scolor_ball(0x111111);
const float shininess_ball = 5;
// Common to all textures.
//
glActiveTexture(GL_TEXTURE0);
glEnable(GL_TEXTURE_2D);
glTexParameterf(GL_TEXTURE_2D,GL_TEXTURE_MIN_FILTER,
GL_LINEAR_MIPMAP_LINEAR);
glTexParameterf(GL_TEXTURE_2D,GL_TEXTURE_MAG_FILTER,GL_LINEAR);
glTexEnvi(GL_TEXTURE_ENV,GL_TEXTURE_ENV_MODE,GL_MODULATE);
glTexParameterf(GL_TEXTURE_2D,GL_TEXTURE_WRAP_S,GL_REPEAT);
glTexParameterf(GL_TEXTURE_2D,GL_TEXTURE_WRAP_T,GL_REPEAT);
glEnable(GL_RESCALE_NORMAL);
glEnable(GL_NORMALIZE);
ogl_helper.fbprintf("%s\n",frame_timer.frame_rate_text_get());
ogl_helper.fbprintf
("Physics Computation: %s ('a' to change) "
" + %d cpu timestep / frame ('x' to change)\n",
gpu_physics_method_str[opt_physics_method],
balloon.opt_cpu_interleave);
ogl_helper.fbprintf
("Eye location: [%5.1f, %5.1f, %5.1f] "
"Eye direction: [%+.2f, %+.2f, %+.2f]\n",
eye_location.x, eye_location.y, eye_location.z,
eye_direction.x, eye_direction.y, eye_direction.z);
pCoor cent = balloon.centroid;
pCoor vel = balloon.velocity_avg();
ogl_helper.fbprintf
("Centroid [%5.1f,%5.1f,%5.1f] Vel [%+5.1f,%+5.1f,%+5.1f] "
"Gas Amt %.2f Volume %.2f Pressure %.2f\n",
cent.x,cent.y,cent.z,
vel.x,vel.y,vel.z,
balloon.gas_amount,
balloon.volume / balloon.nom_volume,
balloon.pressure
);
ogl_helper.fbprintf
("Weight (Surf+Gas-Displ Air=W) (%6.2f + %6.2f - %6.2f = %6.2f)\n",
balloon.opt_gravity_accel * balloon.surface_mass,
balloon.opt_gravity_accel * balloon.volume * balloon.density_gas,
balloon.opt_gravity_accel * balloon.volume * balloon.density_air,
balloon.opt_gravity_accel * balloon.surface_mass
+ balloon.opt_gravity_accel * balloon.volume * balloon.density_gas
- balloon.opt_gravity_accel * balloon.volume * balloon.density_air
);
ogl_helper.fbprintf("Oversample %3.1f\n", balloon.oversample);
pVariable_Control_Elt* const cvar = variable_control.current;
ogl_helper.fbprintf("VAR %s = %.5f (TAB or '`' to change, +/- to adjust)\n",
cvar->name,cvar->var[0]);
const int half_elements = platform_tile_coords.elements >> 3 << 2;
const int vstride = sizeof(Balloon_Vertex);
if ( opt_surface_smooth )
{
//
// Render balloon reflection. (Will be blended with dark tiles.)
//
// Write stencil at location of dark (mirrored) tiles.
//
glDisable(GL_LIGHTING);
glEnable(GL_STENCIL_TEST);
glStencilFunc(GL_NEVER,2,2);
glStencilOp(GL_REPLACE,GL_KEEP,GL_KEEP);
platform_tile_coords.bind();
glVertexPointer(3, GL_FLOAT, sizeof(platform_tile_coords.data[0]), 0);
glEnableClientState(GL_VERTEX_ARRAY);
glDrawArrays(GL_QUADS,half_elements+4,half_elements-4);
glEnable(GL_LIGHTING);
// Prepare to write only stenciled locations.
//
glStencilFunc(GL_EQUAL,2,2);
glStencilOp(GL_KEEP,GL_KEEP,GL_KEEP);
// Use a transform that reflects objects to other size of platform.
//
glMatrixMode(GL_PROJECTION);
glPushMatrix();
glMultTransposeMatrixf(transform_mirror);
// Reflected front face should still be treated as the front face.
//
glFrontFace(GL_CW);
glColor3fv(color_ball);
glMaterialfv(GL_FRONT_AND_BACK,GL_SPECULAR,scolor_ball);
glMaterialf(GL_FRONT_AND_BACK,GL_SHININESS,shininess_ball);
glBindTexture(GL_TEXTURE_2D,balloon.texid_pse);
balloon.tex_coords.bind();
glTexCoordPointer(2,GL_FLOAT,0,NULL);
glEnableClientState(GL_TEXTURE_COORD_ARRAY);
if ( balloon.data_bid )
{
glBindBuffer(GL_ARRAY_BUFFER,balloon.data_bid);
glVertexPointer
(3, GL_FLOAT, balloon.data_stride, (void*)( 2 * sizeof(pCoor) ));
glNormalPointer(GL_FLOAT, balloon.data_stride, NULL );
}
else
{
glBindBuffer(GL_ARRAY_BUFFER,0);
glVertexPointer(4, GL_FLOAT, vstride, &balloon.points[0].pos);
glNormalPointer(GL_FLOAT, vstride, &balloon.points[0].surface_normal);
}
glEnableClientState(GL_VERTEX_ARRAY);
glEnableClientState(GL_NORMAL_ARRAY);
balloon.point_indices.bind(GL_ELEMENT_ARRAY_BUFFER);
glDrawElements
(GL_TRIANGLES,balloon.point_indices.elements,GL_UNSIGNED_INT, NULL);
glDisableClientState(GL_NORMAL_ARRAY);
glDisableClientState(GL_VERTEX_ARRAY);
glDisableClientState(GL_TEXTURE_COORD_ARRAY);
glBindBuffer(GL_ARRAY_BUFFER,0);
glFrontFace(GL_CCW);
glPopMatrix();
glDisable(GL_STENCIL_TEST);
}
{
//
// Write framebuffer stencil with balloon's shadow.
//
// Use transform that maps vertices to platform surface.
//
glMatrixMode(GL_MODELVIEW);
glPushMatrix();
glLoadTransposeMatrixf(modelview_shadow);
glDisable(GL_LIGHTING);
glDisable(GL_TEXTURE_2D);
glEnable(GL_STENCIL_TEST);
glStencilFunc(GL_NEVER,1,-1); // ref, mask
glStencilOp(GL_REPLACE,GL_KEEP,GL_KEEP); // sfail, dfail, dpass
if ( balloon.data_bid )
{
glBindBuffer(GL_ARRAY_BUFFER,balloon.data_bid);
glVertexPointer
(3, GL_FLOAT, balloon.data_stride, (void*)( 2 * sizeof(pCoor) ));
}
else
glVertexPointer(4, GL_FLOAT, vstride, &balloon.points[0].pos);
glEnableClientState(GL_VERTEX_ARRAY);
balloon.point_indices.bind(GL_ELEMENT_ARRAY_BUFFER);
glDrawElements
(GL_TRIANGLES,balloon.point_indices.elements,GL_UNSIGNED_INT, NULL);
glDisableClientState(GL_VERTEX_ARRAY);
glBindBuffer(GL_ARRAY_BUFFER,0);
glEnable(GL_LIGHTING);
glDisable(GL_STENCIL_TEST);
glPopMatrix();
}
// Setup texture for platform.
//
glBindTexture(GL_TEXTURE_2D,balloon.texid_syl);
// Blend dark tiles with existing balloon reflection.
//
glEnable(GL_STENCIL_TEST);
glBlendEquation(GL_FUNC_ADD);
glBlendFunc(GL_CONSTANT_ALPHA,GL_ONE_MINUS_CONSTANT_ALPHA); // src, dst
glBlendColor(0,0,0,0.5);
glDepthFunc(GL_ALWAYS);
glNormal3f(0,1,0);
if ( opt_surface_smooth )
{
glEnable(GL_TEXTURE_2D);
platform_tex_coords.bind();
glTexCoordPointer(2, GL_FLOAT,2*sizeof(float), 0);
glEnableClientState(GL_TEXTURE_COORD_ARRAY);
}
platform_tile_coords.bind();
glVertexPointer
(3, GL_FLOAT,sizeof(platform_tile_coords.data[0]), 0);
glEnableClientState(GL_VERTEX_ARRAY);
for ( int pass = 0; pass < 2; pass++ )
{
if ( pass == 0 )
{
// Prepare to write unshadowed parts of frame buffer.
//
glStencilFunc(GL_NOTEQUAL,1,1);
}
else
{
// Prepare to write shadowed parts of frame buffer.
//
glStencilFunc(GL_EQUAL,1,1);
glLightf(GL_LIGHT0, GL_LINEAR_ATTENUATION, 6.0);
}
if ( opt_surface_smooth ) glEnable(GL_TEXTURE_2D);
// Write lighter-colored, textured tiles.
//
glMaterialfv(GL_FRONT_AND_BACK,GL_SPECULAR,gray);
glMaterialf(GL_FRONT_AND_BACK,GL_SHININESS,2.0);
glColor3f(0.35,0.35,0.35);
glDrawArrays(GL_QUADS,0,half_elements+4);
// Write darker-colored, untextured, mirror tiles.
//
glEnable(GL_BLEND);
glMaterialfv(GL_FRONT_AND_BACK,GL_SPECULAR,white);
glMaterialf(GL_FRONT_AND_BACK,GL_SHININESS,20);
glDisable(GL_TEXTURE_2D);
glColor3fv(lsu_spirit_purple);
glDrawArrays(GL_QUADS,half_elements+4,half_elements-4);
glDisable(GL_BLEND);
}
glDisableClientState(GL_TEXTURE_COORD_ARRAY);
glDisableClientState(GL_VERTEX_ARRAY);
glBindBuffer(GL_ARRAY_BUFFER,0);
glDepthFunc(GL_LESS);
glDisable(GL_TEXTURE_2D);
glDisable(GL_STENCIL_TEST);
glLightf(GL_LIGHT0, GL_LINEAR_ATTENUATION, 1.0);
glMaterialf(GL_BACK,GL_SHININESS,shininess_ball);
//
// Render Balloon
//
if ( opt_surface_smooth )
{
// With Textures
const int vstride = sizeof(Balloon_Vertex);
glEnable(GL_TEXTURE_2D);
glBindTexture(GL_TEXTURE_2D,balloon.texid_pse);
glColor3fv(color_ball);
glMaterialfv(GL_BACK,GL_SPECULAR,scolor_ball);
glColorMaterial(GL_BACK,GL_AMBIENT_AND_DIFFUSE);
pColor color_red(0.9,0.2,0.2);
glMaterialfv(GL_FRONT,GL_AMBIENT_AND_DIFFUSE,color_red);
glMaterialfv(GL_FRONT,GL_SPECULAR,dark);
balloon.tex_coords.bind();
glTexCoordPointer(2,GL_FLOAT,0,NULL);
glEnableClientState(GL_TEXTURE_COORD_ARRAY);
if ( balloon.data_bid )
{
glBindBuffer(GL_ARRAY_BUFFER,balloon.data_bid);
glVertexPointer
(3, GL_FLOAT, balloon.data_stride, (void*)( 2 * sizeof(pCoor) ));
glNormalPointer(GL_FLOAT, balloon.data_stride, NULL );
}
else
{
glBindBuffer(GL_ARRAY_BUFFER,0);
glVertexPointer(4, GL_FLOAT, vstride, &balloon.points[0].pos);
glNormalPointer(GL_FLOAT, vstride, &balloon.points[0].surface_normal);
}
glEnableClientState(GL_VERTEX_ARRAY);
glEnableClientState(GL_NORMAL_ARRAY);
balloon.point_indices.bind(GL_ELEMENT_ARRAY_BUFFER);
glDrawElements
(GL_TRIANGLES,balloon.point_indices.elements,GL_UNSIGNED_INT, NULL);
glDisableClientState(GL_VERTEX_ARRAY);
glDisableClientState(GL_NORMAL_ARRAY);
glDisableClientState(GL_TEXTURE_COORD_ARRAY);
glDisable(GL_TEXTURE_2D);
glBindBuffer(GL_ARRAY_BUFFER,0);
}
else
{
// With Colored Stripes
balloon.gpu_data_to_cpu();
glMaterialfv(GL_BACK,GL_SPECULAR,scolor_ball);
glBegin(GL_TRIANGLES);
for ( int idx = 0; idx < balloon.tri_count; idx++ )
{
Balloon_Triangle* const tri = &balloon.triangles[idx];
Balloon_Vertex* const p = &balloon.points[tri->pi];
Balloon_Vertex* const q = &balloon.points[tri->qi];
Balloon_Vertex* const r = &balloon.points[tri->ri];
glColor3fv(tri->color);
glMaterialfv(GL_FRONT_AND_BACK,GL_SPECULAR,tri->color);
const bool true_normal = false;
if ( !true_normal )
{
pVect norm(q->pos,p->pos,r->pos);
norm.normalize();
glNormal3fv(norm);
}
if ( true_normal ) glNormal3fv(p->surface_normal);
glVertex4fv(p->pos);
if ( true_normal ) glNormal3fv(q->surface_normal);
glVertex4fv(q->pos);
if ( true_normal ) glNormal3fv(r->surface_normal);
glVertex4fv(r->pos);
}
glEnd();
}
insert_tetrahedron(light_location,0.05);
pError_Check();
glColor3f(0,1,0); // This sets the text color. Don't know why.
frame_timer.frame_end();
glutSwapBuffers();
}
void
World::cb_keyboard()
{
if ( !ogl_helper.keyboard_key ) return;
pVect adjustment(0,0,0);
pVect user_rot_axis(0,0,0);
const float move_amt = 0.4;
balloon.gpu_data_to_cpu();
balloon.data_location = DL_CPU;
balloon.data_bid = 0;
balloon.cuda_constants_stale = true;
switch ( ogl_helper.keyboard_key ) {
case FB_KEY_LEFT: adjustment.x = -move_amt; break;
case FB_KEY_RIGHT: adjustment.x = move_amt; break;
case FB_KEY_PAGE_UP: adjustment.y = move_amt; break;
case FB_KEY_PAGE_DOWN: adjustment.y = -move_amt; break;
case FB_KEY_DOWN: adjustment.z = move_amt; break;
case FB_KEY_UP: adjustment.z = -move_amt; break;
case FB_KEY_DELETE: user_rot_axis.y = 1; break;
case FB_KEY_INSERT: user_rot_axis.y = -1; break;
case FB_KEY_HOME: user_rot_axis.x = 1; break;
case FB_KEY_END: user_rot_axis.x = -1; break;
case 'A':
if ( opt_physics_method )
{ opt_physics_method_last = opt_physics_method;
opt_physics_method = GP_cpu; }
else
{ opt_physics_method = opt_physics_method_last; }
break;
case 'a':
opt_physics_method++;
if ( opt_physics_method == GP_ENUM_SIZE ) opt_physics_method = 0;
break;
case 'b': opt_move_item = MI_Balloon; break;
case 'B': opt_move_item = MI_Balloon_V; break;
case 'd': case 'D': balloon.opt_damping = !balloon.opt_damping; break;
case 'e': case 'E': opt_move_item = MI_Eye; break;
case 'g': case 'G': balloon.opt_gravity = !balloon.opt_gravity; break;
case 'l': case 'L': opt_move_item = MI_Light; break;
case 'n': case 'N': opt_surface_smooth = !opt_surface_smooth; break;
case 'p': case 'P': opt_pause = !opt_pause; break;
case 'r': case 'R': balloon.length_relaxed_update = true; break;
case 's': balloon.stop(); break;
case 'S': balloon.freeze(); break;
case 'x': balloon.opt_cpu_interleave = !balloon.opt_cpu_interleave; break;
case 9: variable_control.switch_var_right(); break;
case 96: variable_control.switch_var_left(); break; // `, until S-TAB works.
case '-':case '_': variable_control.adjust_lower(); break;
case '+':case '=': variable_control.adjust_higher(); break;
default: printf("Unknown key, %d\n",ogl_helper.keyboard_key); break;
}
// Update eye_direction based on keyboard command.
//
if ( user_rot_axis.x || user_rot_axis.y )
{
pMatrix_Rotation rotall(eye_direction,pVect(0,0,-1));
user_rot_axis *= invert(rotall);
eye_direction *= pMatrix_Rotation(user_rot_axis, M_PI * 0.03);
modelview_update();
}
// Update eye_location based on keyboard command.
//
if ( adjustment.x || adjustment.y || adjustment.z )
{
// pMatrix_Rotation rotall(eye_direction,pVect(0,0,-1));
const double angle =
fabs(eye_direction.y) > 0.99
? 0 : atan2(eye_direction.x,-eye_direction.z);
pMatrix_Rotation rotall(pVect(0,1,0),-angle);
adjustment *= rotall;
switch ( opt_move_item ){
case MI_Balloon: balloon.translate(adjustment); break;
case MI_Balloon_V: balloon.push(adjustment); break;
case MI_Light: light_location += adjustment; break;
case MI_Eye: eye_location += adjustment; break;
default: break;
}
modelview_update();
}
}
int
main(int argv, char **argc)
{
pOpenGL_Helper popengl_helper(argv,argc);
World world(popengl_helper);
popengl_helper.rate_set(30);
popengl_helper.display_cb_set(world.render_w,&world);
}