multigrid residual checking
Some checks failed
studiorailgun/Renderer/pipeline/head There was a failure building this commit
Some checks failed
studiorailgun/Renderer/pipeline/head There was a failure building this commit
This commit is contained in:
parent
4dafea8680
commit
02e7cd2374
@ -180,11 +180,14 @@ LIBRARY_API void fluid_grid2_solveProjection(
|
|||||||
//perform iteration of v cycle multigrid method
|
//perform iteration of v cycle multigrid method
|
||||||
float residual = 1;
|
float residual = 1;
|
||||||
int iteration = 0;
|
int iteration = 0;
|
||||||
while(iteration < FLUID_GRID2_LINEARSOLVERTIMES && residual > FLUID_GRID2_REALLY_SMALL_VALUE){
|
while(iteration < FLUID_GRID2_LINEARSOLVERTIMES && (residual > FLUID_GRID2_REALLY_SMALL_VALUE || residual < -FLUID_GRID2_REALLY_SMALL_VALUE)){
|
||||||
residual = solver_multigrid_iterate(p,div,a,c);
|
residual = solver_multigrid_iterate(p,div,a,c);
|
||||||
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_NO_DIR,p);
|
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_NO_DIR,p);
|
||||||
iteration++;
|
iteration++;
|
||||||
}
|
}
|
||||||
|
if(residual > FLUID_GRID2_REALLY_SMALL_VALUE || residual < -FLUID_GRID2_REALLY_SMALL_VALUE){
|
||||||
|
printf("Projection residual didn't converge! %f \n",residual);
|
||||||
|
}
|
||||||
|
|
||||||
// solver_conjugate_gradient_solve_serial(p,div,a,c);
|
// solver_conjugate_gradient_solve_serial(p,div,a,c);
|
||||||
|
|
||||||
|
|||||||
@ -29,6 +29,9 @@ static float * halfGridPhi0 = NULL;
|
|||||||
static float * quarterGridPhi = NULL;
|
static float * quarterGridPhi = NULL;
|
||||||
static float * quarterGridPhi0 = NULL;
|
static float * quarterGridPhi0 = NULL;
|
||||||
|
|
||||||
|
|
||||||
|
float solver_multigrid_calculate_residual(float * phi, float * phi0, float a, float c);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Relaxes an ODE matrix by 1 iteration of multigrid method
|
* Relaxes an ODE matrix by 1 iteration of multigrid method
|
||||||
* @param phi The phi array
|
* @param phi The phi array
|
||||||
@ -150,43 +153,59 @@ float solver_multigrid_iterate(float * phi, float * phi0, float a, float c){
|
|||||||
//gauss-seidel at highest res
|
//gauss-seidel at highest res
|
||||||
solver_gauss_seidel_iterate_parallel(phi,phi0,a,c,DIM);
|
solver_gauss_seidel_iterate_parallel(phi,phi0,a,c,DIM);
|
||||||
|
|
||||||
|
|
||||||
|
return solver_multigrid_calculate_residual(phi,phi0,a,c);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculates the residual of the grid
|
||||||
|
*/
|
||||||
|
float solver_multigrid_calculate_residual(float * phi, float * phi0, float a, float c){
|
||||||
//calculate residual
|
//calculate residual
|
||||||
__m256 aScalar = _mm256_set1_ps(a);
|
__m256 aScalar = _mm256_set1_ps(a);
|
||||||
__m256 cScalar = _mm256_set1_ps(c);
|
__m256 cScalar = _mm256_set1_ps(c);
|
||||||
__m256 collector = _mm256_setzero_ps();
|
__m256 collector = _mm256_setzero_ps();
|
||||||
__m256 vector;
|
__m256 vector;
|
||||||
|
float vec_sum_storage[8];
|
||||||
float residual = 1;
|
float residual = 1;
|
||||||
//transform u direction
|
//transform u direction
|
||||||
// int i, j, k;
|
int i, j, k;
|
||||||
// for(k=1; k<DIM-2; k++){
|
for(k=1; k<DIM-1; k++){
|
||||||
// for(j=1; j<DIM-2; j++){
|
for(j=1; j<DIM-1; j++){
|
||||||
// //lower
|
//lower
|
||||||
// i = 1;
|
i = 1;
|
||||||
// vector = _mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i-1,j,k,DIM)]);
|
vector = _mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i-1,j,k,DIM)]);
|
||||||
// vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i+1,j,k,DIM)]));
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i+1,j,k,DIM)]));
|
||||||
// vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j-1,k,DIM)]));
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j-1,k,DIM)]));
|
||||||
// vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j+1,k,DIM)]));
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j+1,k,DIM)]));
|
||||||
// vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j,k-1,DIM)]));
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j,k-1,DIM)]));
|
||||||
// vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j,k+1,DIM)]));
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j,k+1,DIM)]));
|
||||||
// vector = _mm256_mul_ps(vector,aScalar);
|
vector = _mm256_mul_ps(vector,aScalar);
|
||||||
// vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi0[solver_gauss_seidel_get_index(i,j,k,DIM)]));
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi0[solver_gauss_seidel_get_index(i,j,k,DIM)]));
|
||||||
// vector = _mm256_div_ps(vector,cScalar);
|
vector = _mm256_div_ps(vector,cScalar);
|
||||||
// collector = _mm256_add_ps(collector,vector);
|
collector = _mm256_add_ps(collector,vector);
|
||||||
|
|
||||||
// //upper
|
//upper
|
||||||
// i = 9;
|
i = 9;
|
||||||
// vector = _mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i-1,j,k,DIM)]);
|
vector = _mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i-1,j,k,DIM)]);
|
||||||
// vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i+1,j,k,DIM)]));
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i+1,j,k,DIM)]));
|
||||||
// vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j-1,k,DIM)]));
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j-1,k,DIM)]));
|
||||||
// vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j+1,k,DIM)]));
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j+1,k,DIM)]));
|
||||||
// vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j,k-1,DIM)]));
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j,k-1,DIM)]));
|
||||||
// vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j,k+1,DIM)]));
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j,k+1,DIM)]));
|
||||||
// vector = _mm256_mul_ps(vector,aScalar);
|
vector = _mm256_mul_ps(vector,aScalar);
|
||||||
// vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi0[solver_gauss_seidel_get_index(i,j,k,DIM)]));
|
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi0[solver_gauss_seidel_get_index(i,j,k,DIM)]));
|
||||||
// vector = _mm256_div_ps(vector,cScalar);
|
vector = _mm256_div_ps(vector,cScalar);
|
||||||
// collector = _mm256_add_ps(collector,vector);
|
collector = _mm256_add_ps(collector,vector);
|
||||||
// }
|
}
|
||||||
// }
|
}
|
||||||
// residual = vec_sum(collector);
|
_mm256_storeu_ps(vec_sum_storage,collector);
|
||||||
|
residual =
|
||||||
|
vec_sum_storage[0] + vec_sum_storage[1] +
|
||||||
|
vec_sum_storage[2] + vec_sum_storage[3] +
|
||||||
|
vec_sum_storage[4] + vec_sum_storage[5] +
|
||||||
|
vec_sum_storage[6] + vec_sum_storage[7]
|
||||||
|
;
|
||||||
return residual;
|
return residual;
|
||||||
}
|
}
|
||||||
@ -1,29 +1,20 @@
|
|||||||
#include "util/vector.h"
|
#include "util/vector.h"
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Used for summing vecs
|
||||||
|
*/
|
||||||
|
static float vec_sum_storage[8];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sums a float vector's elements
|
* Sums a float vector's elements
|
||||||
*/
|
*/
|
||||||
LIBRARY_API float vec_sum(__m256 x) {
|
LIBRARY_API float vec_sum(__m256 x) {
|
||||||
// hiQuad = ( x7, x6, x5, x4 )
|
_mm256_storeu_ps(vec_sum_storage,x);
|
||||||
const __m128 hiQuad = _mm256_extractf128_ps(x, 1);
|
return
|
||||||
// loQuad = ( x3, x2, x1, x0 )
|
vec_sum_storage[0] + vec_sum_storage[1] +
|
||||||
const __m128 loQuad = _mm256_castps256_ps128(x);
|
vec_sum_storage[2] + vec_sum_storage[3] +
|
||||||
// sumQuad = ( x3 + x7, x2 + x6, x1 + x5, x0 + x4 )
|
vec_sum_storage[4] + vec_sum_storage[5] +
|
||||||
const __m128 sumQuad = _mm_add_ps(loQuad, hiQuad);
|
vec_sum_storage[6] + vec_sum_storage[7]
|
||||||
// loDual = ( -, -, x1 + x5, x0 + x4 )
|
;
|
||||||
const __m128 loDual = sumQuad;
|
|
||||||
// hiDual = ( -, -, x3 + x7, x2 + x6 )
|
|
||||||
const __m128 hiDual = _mm_movehl_ps(sumQuad, sumQuad);
|
|
||||||
// sumDual = ( -, -, x1 + x3 + x5 + x7, x0 + x2 + x4 + x6 )
|
|
||||||
const __m128 sumDual = _mm_add_ps(loDual, hiDual);
|
|
||||||
// lo = ( -, -, -, x0 + x2 + x4 + x6 )
|
|
||||||
const __m128 lo = sumDual;
|
|
||||||
// hi = ( -, -, -, x1 + x3 + x5 + x7 )
|
|
||||||
const __m128 hi = _mm_shuffle_ps(sumDual, sumDual, 0x1);
|
|
||||||
// sum = ( -, -, -, x0 + x1 + x2 + x3 + x4 + x5 + x6 + x7 )
|
|
||||||
const __m128 sum = _mm_add_ss(lo, hi);
|
|
||||||
return _mm_cvtss_f32(sum);
|
|
||||||
}
|
}
|
||||||
@ -44,7 +44,7 @@ int fluid_sim_grid2_add_dens_test1(){
|
|||||||
|
|
||||||
//actually simulate
|
//actually simulate
|
||||||
int frameCount = 50;
|
int frameCount = 50;
|
||||||
int additionFrameCutoff = 10;
|
int additionFrameCutoff = 25;
|
||||||
for(int frame = 0; frame < frameCount; frame++){
|
for(int frame = 0; frame < frameCount; frame++){
|
||||||
if(frame < additionFrameCutoff){
|
if(frame < additionFrameCutoff){
|
||||||
queue[0]->d0[CENTER_LOC][IX(5,5,5)] = MAX_FLUID_VALUE;
|
queue[0]->d0[CENTER_LOC][IX(5,5,5)] = MAX_FLUID_VALUE;
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user