-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathHR_euler.cpp
More file actions
235 lines (167 loc) · 6.88 KB
/
HR_euler.cpp
File metadata and controls
235 lines (167 loc) · 6.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
#include "cpu_ptr.h"
#include "kernel.h"
#include "ICsquare.h"
#include "util_cpu.h"
#include "configurations.h"
#include "cudaProfiler.h"
#include "cuda_profiler_api.h"
// Global variables, will be moved to h-file when ready
// Grid parameters
unsigned int nx = 400;
unsigned int ny = 400;
int border = 2;
//Time parameters
float timeLength = 0.4;
float currentTime = 0;
float dt;
float cfl_number = 0.475;
float theta = 1.3;
float gasGam = 1.4;
int step = 0;
int maxStep = 2;
int main(int argc,char **argv){
// Print GPU properties
//print_properties();
// Files to print the result after the last time step
FILE *rho_file;
FILE *E_file;
rho_file = fopen("rho_final.txt", "w");
E_file = fopen("E_final.txt", "w");
// Construct initial condition for problem
ICsinus Config(-1.0, 1.0, -1.0, 1.0);
//ICsquare Config(0.5,0.5,gasGam);
// Set initial values for Configuration 1
/*
Config.set_rho(rhoConfig19);
Config.set_pressure(pressureConfig19);
Config.set_u(uConfig19);
Config.set_v(vConfig19);
*/
// Determining global border based on left over tiles (a little hack)
int globalPadding;
globalPadding = (nx+2*border+16)/16;
globalPadding = 16*globalPadding - (nx+2*border);
//printf("Globalpad: %i\n", globalPadding);
// Change border to add padding
//border = border + globalPadding/2;
// Initiate the matrices for the unknowns in the Euler equations
cpu_ptr_2D rho(nx, ny, border,1);
cpu_ptr_2D E(nx, ny, border,1);
cpu_ptr_2D rho_u(nx, ny, border,1);
cpu_ptr_2D rho_v(nx, ny, border,1);
// Set initial condition
Config.setIC(rho, rho_u, rho_v, E);
double timeStart = get_wall_time();
// cuda error check
//printf("1 %s\n", cudaGetErrorString(cudaGetLastError()));
// Test
cpu_ptr_2D rho_dummy(nx, ny, border);
rho_dummy.xmin = -1.0;
rho_dummy.ymin = -1.0;
cpu_ptr_2D E_dummy(nx, ny, border);
E_dummy.xmin = -1.0;
E_dummy.ymin = -1.0;
//cudaProfilerStart();
//cuProfilerStart();
// Set block and grid sizes
dim3 gridBC = dim3(1, 1, 1);
dim3 blockBC = dim3(BLOCKDIM_BC,1,1);
dim3 gridBlockFlux;
dim3 threadBlockFlux;
dim3 gridBlockRK;
dim3 threadBlockRK;
computeGridBlock(gridBlockFlux, threadBlockFlux, nx + 2*border, ny + 2*border, INNERTILEDIM_X, INNERTILEDIM_Y, BLOCKDIM_X, BLOCKDIM_Y);
computeGridBlock(gridBlockRK, threadBlockRK, nx + 2*border, ny + 2*border, BLOCKDIM_X_RK, BLOCKDIM_Y_RK, BLOCKDIM_X_RK, BLOCKDIM_Y_RK);
int nElements = gridBlockFlux.x*gridBlockFlux.y;
printf("2 %s\n", cudaGetErrorString(cudaGetLastError()));
// Allocate memory for the GPU pointers
gpu_ptr_1D L_device(nElements);
gpu_ptr_1D dt_device(1);
gpu_ptr_2D rho_device(nx, ny, border);
gpu_ptr_2D E_device(nx, ny, border);
gpu_ptr_2D rho_u_device(nx, ny, border);
gpu_ptr_2D rho_v_device(nx, ny, border);
gpu_ptr_2D R0(nx, ny, border);
gpu_ptr_2D R1(nx, ny, border);
gpu_ptr_2D R2(nx, ny, border);
gpu_ptr_2D R3(nx, ny, border);
gpu_ptr_2D Q0(nx, ny, border);
gpu_ptr_2D Q1(nx, ny, border);
gpu_ptr_2D Q2(nx, ny, border);
gpu_ptr_2D Q3(nx, ny, border);
// Allocate pinned memory on host
init_allocate();
// Set BC arguments
set_bc_args(BCArgs[0], rho_device.getRawPtr(), rho_u_device.getRawPtr(), rho_v_device.getRawPtr(), E_device.getRawPtr(), nx+2*border, ny+2*border, border);
set_bc_args(BCArgs[1], Q0.getRawPtr(), Q1.getRawPtr(), Q2.getRawPtr(), Q3.getRawPtr(), nx+2*border, ny+2*border, border);
set_bc_args(BCArgs[2], rho_device.getRawPtr(), rho_u_device.getRawPtr(), rho_v_device.getRawPtr(), E_device.getRawPtr(), nx+2*border, ny+2*border, border);
// Set FLUX arguments
set_flux_args(fluxArgs[0], L_device.getRawPtr(), rho_device.getRawPtr(), rho_u_device.getRawPtr(), rho_v_device.getRawPtr(), E_device.getRawPtr(), R0.getRawPtr(),R1.getRawPtr(), R2.getRawPtr(), R3.getRawPtr(), nx, ny, border, rho.get_dx(), rho.get_dy(), theta, gasGam, INNERTILEDIM_X, INNERTILEDIM_Y);
set_flux_args(fluxArgs[1], L_device.getRawPtr(), Q0.getRawPtr(), Q1.getRawPtr(), Q2.getRawPtr(), Q3.getRawPtr(), R0.getRawPtr(),R1.getRawPtr(), R2.getRawPtr(), R3.getRawPtr(), nx, ny, border, rho.get_dx(), rho.get_dy(), theta, gasGam, INNERTILEDIM_X, INNERTILEDIM_Y);
// Set TIME argument
set_dt_args(dtArgs, L_device.getRawPtr(), dt_device.getRawPtr(), nElements, rho.get_dx(), rho.get_dy(), cfl_number);
// Set Rk arguments
set_rk_args(RKArgs[0], dt_device.getRawPtr(), rho_device.getRawPtr(), rho_u_device.getRawPtr(), rho_v_device.getRawPtr(), E_device.getRawPtr(), R0.getRawPtr(), R1.getRawPtr(), R2.getRawPtr(), R3.getRawPtr(), Q0.getRawPtr(), Q1.getRawPtr(), Q2.getRawPtr(), Q3.getRawPtr(), nx, ny, border);
set_rk_args(RKArgs[1], dt_device.getRawPtr(), Q0.getRawPtr(), Q1.getRawPtr(), Q2.getRawPtr(), Q3.getRawPtr(), R0.getRawPtr(), R1.getRawPtr(), R2.getRawPtr(), R3.getRawPtr(), rho_device.getRawPtr(), rho_u_device.getRawPtr(), rho_v_device.getRawPtr(), E_device.getRawPtr(), nx, ny, border);
L_device.set(FLT_MAX);
R0.set(0,0,0,nx,ny,border);
R1.set(0,0,0,nx,ny,border);
R2.set(0,0,0,nx,ny,border);
R3.set(0,0,0,nx,ny,border);
Q0.set(0,0,0,nx,ny,border);
Q1.set(0,0,0,nx,ny,border);
Q2.set(0,0,0,nx,ny,border);
Q3.set(0,0,0,nx,ny,border);
rho_device.upload(rho.get_ptr());
rho_u_device.upload(rho_u.get_ptr());
rho_v_device.upload(rho_v.get_ptr());
E_device.upload(E.get_ptr());
// Update boudries
callCollectiveSetBCPeriodic(gridBC, blockBC, BCArgs[0]);
//Create cuda stream
cudaStream_t stream1;
cudaStreamCreate(&stream1);
cudaEvent_t dt_complete;
cudaEventCreate(&dt_complete);
while (currentTime < timeLength && step < maxStep){
//RK1
//Compute flux
callFluxKernel(gridBlockFlux, threadBlockFlux, 0, fluxArgs[0]);
// Compute timestep (based on CFL condition)
callDtKernel(TIMETHREADS, dtArgs);
cudaMemcpy(dt_host, dt_device.getRawPtr(), sizeof(float), cudaMemcpyDeviceToHost); //, stream1);
// cudaEventRecord(dt_complete, stream1);
// Perform RK1 step
callRKKernel(gridBlockRK, threadBlockRK, 0, RKArgs[0]);
//Update boudries
callCollectiveSetBCPeriodic(gridBC, blockBC, BCArgs[1]);
//RK2
// Compute flux
callFluxKernel(gridBlockFlux, threadBlockFlux, 1, fluxArgs[1]);
//Perform RK2 step
callRKKernel(gridBlockRK, threadBlockRK, 1, RKArgs[1]);
//cudaEventRecord(srteam_sync, srteam1);
callCollectiveSetBCPeriodic(gridBC, blockBC, BCArgs[2]);
//cudaEventSynchronize(dt_complete);
step++;
currentTime += *dt_host;
// printf("Step: %i, current time: %.6f dt:%.6f\n" , step,currentTime, dt_host[0]);
}
//cuProfilerStop();
//cudaProfilerStop();
printf("Elapsed time %.5f", get_wall_time() - timeStart);
rho_device.download(rho_dummy.get_ptr());
rho_dummy.printToFile(rho_file, true, false);
Config.exactSolution(E_dummy, currentTime);
E_dummy.printToFile(E_file, true, false);
printf("Step: %i, current time: %.6f dt:%.6f" , step,currentTime, dt_host[0]);
// Print test
printf("9 %s\n", cudaGetErrorString(cudaGetLastError()));
/*
cudaMemcpy(L_host, L_device, sizeof(float)*(nElements), cudaMemcpyDeviceToHost);
for (int i =0; i < nElements; i++)
printf(" %.7f ", L_host[i]);
*/
printf("%s\n", cudaGetErrorString(cudaGetLastError()));
return(0);
}