multigrid residual checking
Some checks failed
studiorailgun/Renderer/pipeline/head There was a failure building this commit
Some checks failed
studiorailgun/Renderer/pipeline/head There was a failure building this commit
This commit is contained in:
parent
4dafea8680
commit
02e7cd2374
@ -180,11 +180,14 @@ LIBRARY_API void fluid_grid2_solveProjection(
|
||||
//perform iteration of v cycle multigrid method
|
||||
float residual = 1;
|
||||
int iteration = 0;
|
||||
while(iteration < FLUID_GRID2_LINEARSOLVERTIMES && residual > FLUID_GRID2_REALLY_SMALL_VALUE){
|
||||
while(iteration < FLUID_GRID2_LINEARSOLVERTIMES && (residual > FLUID_GRID2_REALLY_SMALL_VALUE || residual < -FLUID_GRID2_REALLY_SMALL_VALUE)){
|
||||
residual = solver_multigrid_iterate(p,div,a,c);
|
||||
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_NO_DIR,p);
|
||||
iteration++;
|
||||
}
|
||||
if(residual > FLUID_GRID2_REALLY_SMALL_VALUE || residual < -FLUID_GRID2_REALLY_SMALL_VALUE){
|
||||
printf("Projection residual didn't converge! %f \n",residual);
|
||||
}
|
||||
|
||||
// solver_conjugate_gradient_solve_serial(p,div,a,c);
|
||||
|
||||
|
||||
@ -29,6 +29,9 @@ static float * halfGridPhi0 = NULL;
|
||||
static float * quarterGridPhi = NULL;
|
||||
static float * quarterGridPhi0 = NULL;
|
||||
|
||||
|
||||
float solver_multigrid_calculate_residual(float * phi, float * phi0, float a, float c);
|
||||
|
||||
/**
|
||||
* Relaxes an ODE matrix by 1 iteration of multigrid method
|
||||
* @param phi The phi array
|
||||
@ -150,43 +153,59 @@ float solver_multigrid_iterate(float * phi, float * phi0, float a, float c){
|
||||
//gauss-seidel at highest res
|
||||
solver_gauss_seidel_iterate_parallel(phi,phi0,a,c,DIM);
|
||||
|
||||
|
||||
return solver_multigrid_calculate_residual(phi,phi0,a,c);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Calculates the residual of the grid
|
||||
*/
|
||||
float solver_multigrid_calculate_residual(float * phi, float * phi0, float a, float c){
|
||||
//calculate residual
|
||||
__m256 aScalar = _mm256_set1_ps(a);
|
||||
__m256 cScalar = _mm256_set1_ps(c);
|
||||
__m256 collector = _mm256_setzero_ps();
|
||||
__m256 vector;
|
||||
float vec_sum_storage[8];
|
||||
float residual = 1;
|
||||
//transform u direction
|
||||
// int i, j, k;
|
||||
// for(k=1; k<DIM-2; k++){
|
||||
// for(j=1; j<DIM-2; j++){
|
||||
// //lower
|
||||
// i = 1;
|
||||
// vector = _mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i-1,j,k,DIM)]);
|
||||
// vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i+1,j,k,DIM)]));
|
||||
// vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j-1,k,DIM)]));
|
||||
// vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j+1,k,DIM)]));
|
||||
// vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j,k-1,DIM)]));
|
||||
// vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j,k+1,DIM)]));
|
||||
// vector = _mm256_mul_ps(vector,aScalar);
|
||||
// vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi0[solver_gauss_seidel_get_index(i,j,k,DIM)]));
|
||||
// vector = _mm256_div_ps(vector,cScalar);
|
||||
// collector = _mm256_add_ps(collector,vector);
|
||||
int i, j, k;
|
||||
for(k=1; k<DIM-1; k++){
|
||||
for(j=1; j<DIM-1; j++){
|
||||
//lower
|
||||
i = 1;
|
||||
vector = _mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i-1,j,k,DIM)]);
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i+1,j,k,DIM)]));
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j-1,k,DIM)]));
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j+1,k,DIM)]));
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j,k-1,DIM)]));
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j,k+1,DIM)]));
|
||||
vector = _mm256_mul_ps(vector,aScalar);
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi0[solver_gauss_seidel_get_index(i,j,k,DIM)]));
|
||||
vector = _mm256_div_ps(vector,cScalar);
|
||||
collector = _mm256_add_ps(collector,vector);
|
||||
|
||||
// //upper
|
||||
// i = 9;
|
||||
// vector = _mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i-1,j,k,DIM)]);
|
||||
// vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i+1,j,k,DIM)]));
|
||||
// vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j-1,k,DIM)]));
|
||||
// vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j+1,k,DIM)]));
|
||||
// vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j,k-1,DIM)]));
|
||||
// vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j,k+1,DIM)]));
|
||||
// vector = _mm256_mul_ps(vector,aScalar);
|
||||
// vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi0[solver_gauss_seidel_get_index(i,j,k,DIM)]));
|
||||
// vector = _mm256_div_ps(vector,cScalar);
|
||||
// collector = _mm256_add_ps(collector,vector);
|
||||
// }
|
||||
// }
|
||||
// residual = vec_sum(collector);
|
||||
//upper
|
||||
i = 9;
|
||||
vector = _mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i-1,j,k,DIM)]);
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i+1,j,k,DIM)]));
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j-1,k,DIM)]));
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j+1,k,DIM)]));
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j,k-1,DIM)]));
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j,k+1,DIM)]));
|
||||
vector = _mm256_mul_ps(vector,aScalar);
|
||||
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi0[solver_gauss_seidel_get_index(i,j,k,DIM)]));
|
||||
vector = _mm256_div_ps(vector,cScalar);
|
||||
collector = _mm256_add_ps(collector,vector);
|
||||
}
|
||||
}
|
||||
_mm256_storeu_ps(vec_sum_storage,collector);
|
||||
residual =
|
||||
vec_sum_storage[0] + vec_sum_storage[1] +
|
||||
vec_sum_storage[2] + vec_sum_storage[3] +
|
||||
vec_sum_storage[4] + vec_sum_storage[5] +
|
||||
vec_sum_storage[6] + vec_sum_storage[7]
|
||||
;
|
||||
return residual;
|
||||
}
|
||||
@ -1,29 +1,20 @@
|
||||
#include "util/vector.h"
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Used for summing vecs
|
||||
*/
|
||||
static float vec_sum_storage[8];
|
||||
|
||||
/**
|
||||
* Sums a float vector's elements
|
||||
*/
|
||||
LIBRARY_API float vec_sum(__m256 x) {
|
||||
// hiQuad = ( x7, x6, x5, x4 )
|
||||
const __m128 hiQuad = _mm256_extractf128_ps(x, 1);
|
||||
// loQuad = ( x3, x2, x1, x0 )
|
||||
const __m128 loQuad = _mm256_castps256_ps128(x);
|
||||
// sumQuad = ( x3 + x7, x2 + x6, x1 + x5, x0 + x4 )
|
||||
const __m128 sumQuad = _mm_add_ps(loQuad, hiQuad);
|
||||
// loDual = ( -, -, x1 + x5, x0 + x4 )
|
||||
const __m128 loDual = sumQuad;
|
||||
// hiDual = ( -, -, x3 + x7, x2 + x6 )
|
||||
const __m128 hiDual = _mm_movehl_ps(sumQuad, sumQuad);
|
||||
// sumDual = ( -, -, x1 + x3 + x5 + x7, x0 + x2 + x4 + x6 )
|
||||
const __m128 sumDual = _mm_add_ps(loDual, hiDual);
|
||||
// lo = ( -, -, -, x0 + x2 + x4 + x6 )
|
||||
const __m128 lo = sumDual;
|
||||
// hi = ( -, -, -, x1 + x3 + x5 + x7 )
|
||||
const __m128 hi = _mm_shuffle_ps(sumDual, sumDual, 0x1);
|
||||
// sum = ( -, -, -, x0 + x1 + x2 + x3 + x4 + x5 + x6 + x7 )
|
||||
const __m128 sum = _mm_add_ss(lo, hi);
|
||||
return _mm_cvtss_f32(sum);
|
||||
_mm256_storeu_ps(vec_sum_storage,x);
|
||||
return
|
||||
vec_sum_storage[0] + vec_sum_storage[1] +
|
||||
vec_sum_storage[2] + vec_sum_storage[3] +
|
||||
vec_sum_storage[4] + vec_sum_storage[5] +
|
||||
vec_sum_storage[6] + vec_sum_storage[7]
|
||||
;
|
||||
}
|
||||
@ -44,7 +44,7 @@ int fluid_sim_grid2_add_dens_test1(){
|
||||
|
||||
//actually simulate
|
||||
int frameCount = 50;
|
||||
int additionFrameCutoff = 10;
|
||||
int additionFrameCutoff = 25;
|
||||
for(int frame = 0; frame < frameCount; frame++){
|
||||
if(frame < additionFrameCutoff){
|
||||
queue[0]->d0[CENTER_LOC][IX(5,5,5)] = MAX_FLUID_VALUE;
|
||||
|
||||
Loading…
Reference in New Issue
Block a user