multigrid residual checking
Some checks failed
studiorailgun/Renderer/pipeline/head There was a failure building this commit

This commit is contained in:
austin 2024-12-11 13:20:03 -05:00
parent 4dafea8680
commit 02e7cd2374
4 changed files with 66 additions and 53 deletions

View File

@ -180,11 +180,14 @@ LIBRARY_API void fluid_grid2_solveProjection(
//perform iteration of v cycle multigrid method
float residual = 1;
int iteration = 0;
while(iteration < FLUID_GRID2_LINEARSOLVERTIMES && residual > FLUID_GRID2_REALLY_SMALL_VALUE){
while(iteration < FLUID_GRID2_LINEARSOLVERTIMES && (residual > FLUID_GRID2_REALLY_SMALL_VALUE || residual < -FLUID_GRID2_REALLY_SMALL_VALUE)){
residual = solver_multigrid_iterate(p,div,a,c);
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_NO_DIR,p);
iteration++;
}
if(residual > FLUID_GRID2_REALLY_SMALL_VALUE || residual < -FLUID_GRID2_REALLY_SMALL_VALUE){
printf("Projection residual didn't converge! %f \n",residual);
}
// solver_conjugate_gradient_solve_serial(p,div,a,c);

View File

@ -29,6 +29,9 @@ static float * halfGridPhi0 = NULL;
static float * quarterGridPhi = NULL;
static float * quarterGridPhi0 = NULL;
float solver_multigrid_calculate_residual(float * phi, float * phi0, float a, float c);
/**
* Relaxes an ODE matrix by 1 iteration of multigrid method
* @param phi The phi array
@ -150,43 +153,59 @@ float solver_multigrid_iterate(float * phi, float * phi0, float a, float c){
//gauss-seidel at highest res
solver_gauss_seidel_iterate_parallel(phi,phi0,a,c,DIM);
return solver_multigrid_calculate_residual(phi,phi0,a,c);
}
/**
* Calculates the residual of the grid
*/
float solver_multigrid_calculate_residual(float * phi, float * phi0, float a, float c){
//calculate residual
__m256 aScalar = _mm256_set1_ps(a);
__m256 cScalar = _mm256_set1_ps(c);
__m256 collector = _mm256_setzero_ps();
__m256 vector;
float vec_sum_storage[8];
float residual = 1;
//transform u direction
// int i, j, k;
// for(k=1; k<DIM-2; k++){
// for(j=1; j<DIM-2; j++){
// //lower
// i = 1;
// vector = _mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i-1,j,k,DIM)]);
// vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i+1,j,k,DIM)]));
// vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j-1,k,DIM)]));
// vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j+1,k,DIM)]));
// vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j,k-1,DIM)]));
// vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j,k+1,DIM)]));
// vector = _mm256_mul_ps(vector,aScalar);
// vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi0[solver_gauss_seidel_get_index(i,j,k,DIM)]));
// vector = _mm256_div_ps(vector,cScalar);
// collector = _mm256_add_ps(collector,vector);
int i, j, k;
for(k=1; k<DIM-1; k++){
for(j=1; j<DIM-1; j++){
//lower
i = 1;
vector = _mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i-1,j,k,DIM)]);
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i+1,j,k,DIM)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j-1,k,DIM)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j+1,k,DIM)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j,k-1,DIM)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j,k+1,DIM)]));
vector = _mm256_mul_ps(vector,aScalar);
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi0[solver_gauss_seidel_get_index(i,j,k,DIM)]));
vector = _mm256_div_ps(vector,cScalar);
collector = _mm256_add_ps(collector,vector);
// //upper
// i = 9;
// vector = _mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i-1,j,k,DIM)]);
// vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i+1,j,k,DIM)]));
// vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j-1,k,DIM)]));
// vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j+1,k,DIM)]));
// vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j,k-1,DIM)]));
// vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j,k+1,DIM)]));
// vector = _mm256_mul_ps(vector,aScalar);
// vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi0[solver_gauss_seidel_get_index(i,j,k,DIM)]));
// vector = _mm256_div_ps(vector,cScalar);
// collector = _mm256_add_ps(collector,vector);
// }
// }
// residual = vec_sum(collector);
//upper
i = 9;
vector = _mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i-1,j,k,DIM)]);
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i+1,j,k,DIM)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j-1,k,DIM)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j+1,k,DIM)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j,k-1,DIM)]));
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(i,j,k+1,DIM)]));
vector = _mm256_mul_ps(vector,aScalar);
vector = _mm256_add_ps(vector,_mm256_loadu_ps(&phi0[solver_gauss_seidel_get_index(i,j,k,DIM)]));
vector = _mm256_div_ps(vector,cScalar);
collector = _mm256_add_ps(collector,vector);
}
}
_mm256_storeu_ps(vec_sum_storage,collector);
residual =
vec_sum_storage[0] + vec_sum_storage[1] +
vec_sum_storage[2] + vec_sum_storage[3] +
vec_sum_storage[4] + vec_sum_storage[5] +
vec_sum_storage[6] + vec_sum_storage[7]
;
return residual;
}

View File

@ -1,29 +1,20 @@
#include "util/vector.h"
/**
* Used for summing vecs
*/
static float vec_sum_storage[8];
/**
* Sums a float vector's elements
*/
LIBRARY_API float vec_sum(__m256 x) {
// hiQuad = ( x7, x6, x5, x4 )
const __m128 hiQuad = _mm256_extractf128_ps(x, 1);
// loQuad = ( x3, x2, x1, x0 )
const __m128 loQuad = _mm256_castps256_ps128(x);
// sumQuad = ( x3 + x7, x2 + x6, x1 + x5, x0 + x4 )
const __m128 sumQuad = _mm_add_ps(loQuad, hiQuad);
// loDual = ( -, -, x1 + x5, x0 + x4 )
const __m128 loDual = sumQuad;
// hiDual = ( -, -, x3 + x7, x2 + x6 )
const __m128 hiDual = _mm_movehl_ps(sumQuad, sumQuad);
// sumDual = ( -, -, x1 + x3 + x5 + x7, x0 + x2 + x4 + x6 )
const __m128 sumDual = _mm_add_ps(loDual, hiDual);
// lo = ( -, -, -, x0 + x2 + x4 + x6 )
const __m128 lo = sumDual;
// hi = ( -, -, -, x1 + x3 + x5 + x7 )
const __m128 hi = _mm_shuffle_ps(sumDual, sumDual, 0x1);
// sum = ( -, -, -, x0 + x1 + x2 + x3 + x4 + x5 + x6 + x7 )
const __m128 sum = _mm_add_ss(lo, hi);
return _mm_cvtss_f32(sum);
_mm256_storeu_ps(vec_sum_storage,x);
return
vec_sum_storage[0] + vec_sum_storage[1] +
vec_sum_storage[2] + vec_sum_storage[3] +
vec_sum_storage[4] + vec_sum_storage[5] +
vec_sum_storage[6] + vec_sum_storage[7]
;
}

View File

@ -44,7 +44,7 @@ int fluid_sim_grid2_add_dens_test1(){
//actually simulate
int frameCount = 50;
int additionFrameCutoff = 10;
int additionFrameCutoff = 25;
for(int frame = 0; frame < frameCount; frame++){
if(frame < additionFrameCutoff){
queue[0]->d0[CENTER_LOC][IX(5,5,5)] = MAX_FLUID_VALUE;