-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutil_cpu.h
More file actions
129 lines (99 loc) · 3.4 KB
/
util_cpu.h
File metadata and controls
129 lines (99 loc) · 3.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#include "cuda.h"
#include "gpu_ptr.h"
#include "kernel_arg_structs.h"
#include <sys/time.h>
// Print GPU properties
void print_properties(){
int deviceCount = 0;
cudaGetDeviceCount(&deviceCount);
printf("Device count: %d\n", deviceCount);
cudaDeviceProp p;
cudaSetDevice(0);
cudaGetDeviceProperties (&p, 0);
printf("Compute capability: %d.%d\n", p.major, p.minor);
printf("Name: %s\n" , p.name);
printf("Compute concurrency %i\n", p.concurrentKernels);
printf("\n\n");
}
double get_wall_time(){
struct timeval time;
if (gettimeofday(&time,NULL)){
// Handle error
return 0;
}
return (double)time.tv_sec + (double)time.tv_usec * .000001;
}
double get_cpu_time(){
return (double)clock() / CLOCKS_PER_SEC;
}
inline void set_bc_args(collBCKernelArgs* args, gpu_raw_ptr U0, gpu_raw_ptr U1, gpu_raw_ptr U2, gpu_raw_ptr U3, unsigned int NX, unsigned int NY,int border){
args->U0 = U0;
args->U1 = U1;
args->U2 = U2;
args->U3 = U3;
args->NX = NX;
args->NY = NY;
args->global_border = border;
}
inline void set_rk_args(RKKernelArgs* args, float* dt, gpu_raw_ptr U0, gpu_raw_ptr U1, gpu_raw_ptr U2, gpu_raw_ptr U3, gpu_raw_ptr R0, gpu_raw_ptr R1, gpu_raw_ptr R2, gpu_raw_ptr R3, gpu_raw_ptr Q0, gpu_raw_ptr Q1, gpu_raw_ptr Q2, gpu_raw_ptr Q3, unsigned int nx,unsigned int ny, int border){
args->dt = dt;
args->U0 = U0;
args->U1 = U1;
args->U2 = U2;
args->U3 = U3;
args->R0 = R0;
args->R1 = R1;
args->R2 = R2;
args->R3 = R3;
args->Q0 = Q0;
args->Q1 = Q1;
args->Q2 = Q2;
args->Q3 = Q3;
args->nx = nx;
args->ny = ny;
args->global_border = border;
}
inline void set_dt_args(DtKernelArgs* args, float* L, float* dt, unsigned int nElements, float dx, float dy, float scale){
args->L = L;
args->dt = dt;
args->nElements = nElements;
args->dx = dx;
args->dy = dy;
args->scale = scale;
}
inline void set_flux_args(FluxKernelArgs* args, float* L, gpu_raw_ptr U0, gpu_raw_ptr U1, gpu_raw_ptr U2, gpu_raw_ptr U3, gpu_raw_ptr R0, gpu_raw_ptr R1, gpu_raw_ptr R2, gpu_raw_ptr R3, unsigned int nx, unsigned int ny, int border, float dx, float dy, float theta, float gamma, int innerDimX, int innerDimY){
args->L = L;
args->U0 = U0;
args->U1 = U1;
args->U2 = U2;
args->U3 = U3;
args->R0 = R0;
args->R1 = R1;
args->R2 = R2;
args->R3 = R3;
args->nx = nx;
args->ny = ny;
args->global_border = border;
args->dx = dx;
args->dy = dy;
args->gamma = gamma;
args->theta = theta;
args->innerDimX = innerDimX;
args->innerDimY = innerDimY;
}
void setLandDt(int nElements, float* L_host, float* L_device, float* dt_device){
L_host = new float[nElements];
for (int i = 0; i < nElements; i++)
L_host[i] = FLT_MAX;
cudaMalloc((void**)&L_device, sizeof(float)*(nElements));
cudaMemcpy(L_device,L_host, sizeof(float)*(nElements), cudaMemcpyHostToDevice);
cudaMalloc((void**)&dt_device, sizeof(float));
}
void computeGridBlock(dim3& gridBlock, dim3& threadBlock, int NX, int NY, int tiledimX, int tiledimY, int blockdimX, int blockdimY){
int gridDimx = (NX + tiledimX - 1)/tiledimX;
int gridDimy = (NY + tiledimY - 1)/tiledimY;
threadBlock.x = blockdimX;
threadBlock.y = blockdimY;
gridBlock.x = gridDimx;
gridBlock.y = gridDimy;
}