#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "H:cuda_by_examplecommonook.h"
#include "H:cuda_by_examplecommoncpu_bitmap.h"
#include "device_functions.h"
#include #define DIM 1024 #define rnd( x ) (x * rand() / RAND_MAX)
#define INF 2e10f //数据结构对球面建模
struct Sphere { float r,b,g; float radius; float x,y,z; //hit方法,计算光线是否与球面相交,若相交则返回光线到命中球面处的距离__device__ float hit( float ox, float oy, float *n ) { float dx = ox - x; float dy = oy - y; if (dx*dx + dy*dy < radius*radius) { float dz = sqrtf( radius*radius - dx*dx - dy*dy ); *n = dz / sqrtf( radius * radius ); return dz + z; } return -INF; }
#define SPHERES 30 //核函数内容
__global__ void kernel( Sphere *s, unsigned char *ptr ) { //将threadIdx/BlockIdx映射到像素位置int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; int offset = x + y * blockDim.x * gridDim.x; //让图像坐标偏移DIM/2,使z轴穿过图像中心float ox = (x - DIM/2); float oy = (y - DIM/2); //初始化背景颜色为黑色float r=0, g=0, b=0; float maxz = -INF; //对每一个球面数组进行迭代for(int i=0; i maxz) { float fscale = n; r = s[i].r * fscale; g = s[i].g * fscale; b = s[i].b * fscale; //更新距离maxz = t; } } //判断球面相交情况后,将当前颜色保存到输出图像中ptr[offset*4 + 0] = (int)(r * 255); ptr[offset*4 + 1] = (int)(g * 255); ptr[offset*4 + 2] = (int)(b * 255); ptr[offset*4 + 3] = 255;
} // globals needed by the update routine
struct DataBlock { unsigned char *dev_bitmap; Sphere *s;
}; int main( void ) { DataBlock data; //记录起始时间cudaEvent_t start, stop; HANDLE_ERROR( cudaEventCreate( &start ) ); HANDLE_ERROR( cudaEventCreate( &stop ) ); HANDLE_ERROR( cudaEventRecord( start, 0 ) ); CPUBitmap bitmap( DIM, DIM, &data ); unsigned char *dev_bitmap; Sphere *s; // allocate memory on the GPU for the output bitmap HANDLE_ERROR( cudaMalloc( (void**)&dev_bitmap, bitmap.image_size() ) ); // allocate memory for the Sphere dataset HANDLE_ERROR( cudaMalloc( (void**)&s, sizeof(Sphere) * SPHERES ) ); // allocate temp memory, initialize it, copy to // memory on the GPU, then free our temp memory //生成球面的中心坐标颜色和半径Sphere *temp_s = (Sphere*)malloc( sizeof(Sphere) * SPHERES ); for (int i=0; i>>( s, dev_bitmap ); // copy our bitmap back from the GPU for display HANDLE_ERROR( cudaMemcpy( bitmap.get_ptr(), dev_bitmap, bitmap.image_size(), cudaMemcpyDeviceToHost ) ); // get stop time, and display the timing results HANDLE_ERROR( cudaEventRecord( stop, 0 ) ); HANDLE_ERROR( cudaEventSynchronize( stop ) ); float elapsedTime; HANDLE_ERROR( cudaEventElapsedTime( &elapsedTime, start, stop ) ); printf( "Time to generate: %3.1f ms
", elapsedTime ); HANDLE_ERROR( cudaEventDestroy( start ) ); HANDLE_ERROR( cudaEventDestroy( stop ) ); HANDLE_ERROR( cudaFree( dev_bitmap ) ); HANDLE_ERROR( cudaFree( s ) ); // display bitmap.display_and_exit();
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "H:cuda_by_examplecommonook.h"
#include "H:cuda_by_examplecommoncpu_bitmap.h"
#include "device_functions.h"
#define DIM 1024 #define rnd( x ) (x * rand() / RAND_MAX)
#define INF 2e10f struct Sphere { float r,b,g; float radius; float x,y,z; __device__ float hit( float ox, float oy, float *n ) { float dx = ox - x; float dy = oy - y; if (dx*dx + dy*dy < radius*radius) { float dz = sqrtf( radius*radius - dx*dx - dy*dy ); *n = dz / sqrtf( radius * radius ); return dz + z; } return -INF; }
#define SPHERES 30
__constant__ Sphere s[SPHERES]; __global__ void kernel( unsigned char *ptr ) { // map from threadIdx/BlockIdx to pixel posiytion int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; int offset = x + y * blockDim.x * gridDim.x; float ox = (x - DIM/2); float oy = (y - DIM/2); float r=0, g=0, b=0; float maxz = -INF; for(int i=0; i maxz) { float fscale = n; r = s[i].r * fscale; g = s[i].g * fscale; b = s[i].b * fscale; maxz = t; } } ptr[offset*4 + 0] = (int)(r * 255); ptr[offset*4 + 1] = (int)(g * 255); ptr[offset*4 + 2] = (int)(b * 255); ptr[offset*4 + 3] = 255;
} struct DataBlock{unsigned char *dev_bitmap;
};int main(){DataBlock data;// capture the start time and start to record itcudaEvent_t start,stop;HANDLE_ERROR(cudaEventCreate(&start));HANDLE_ERROR(cudaEventCreate(&stop));HANDLE_ERROR(cudaEventRecord(start,0));CPUBitmap bitmap(DIM,DIM,&data);unsigned char *dev_bitmap;//allocate the memory on the GPU for the output bitmapHANDLE_ERROR(cudaMalloc((void**)&dev_bitmap,bitmap.image_size()));Sphere *temp_s = (Sphere*)malloc(sizeof(Sphere)*SPHERES); for (int i=0; i>>(dev_bitmap);//copy the bitmap back from GPU to CPU for displayHANDLE_ERROR(cudaMemcpy(bitmap.get_ptr(),dev_bitmap,bitmap.image_size(),cudaMemcpyDeviceToHost));HANDLE_ERROR(cudaEventRecord(stop,0));//stop the time recordHANDLE_ERROR(cudaEventSynchronize(stop));float elapsedTime;HANDLE_ERROR(cudaEventElapsedTime(&elapsedTime,start,stop));printf( "Time to generate: %3.1f ms
", elapsedTime ); HANDLE_ERROR(cudaEventDestroy(start));HANDLE_ERROR(cudaEventDestroy(stop));HANDLE_ERROR(cudaFree(dev_bitmap));bitmap.display_and_exit();
