parallelize addition functions
Some checks failed
studiorailgun/Renderer/pipeline/head There was a failure building this commit

This commit is contained in:
austin 2024-12-12 11:03:38 -05:00
parent 4010b2beb7
commit 3d8d2cf52b
4 changed files with 68 additions and 10 deletions

View File

@ -5,7 +5,7 @@
/**
* The number of times to relax most solvers
*/
#define FLUID_GRID2_LINEARSOLVERTIMES 5
#define FLUID_GRID2_LINEARSOLVERTIMES 10
/**
* The number of times to relax most solvers

View File

@ -10,6 +10,7 @@
#include "fluid/sim/grid2/utilities.h"
#include "fluid/sim/grid2/velocity.h"
#include "math/ode/multigrid.h"
#include "math/ode/multigrid_parallel.h"
#include "math/ode/gauss_seidel.h"
@ -29,6 +30,7 @@ void fluid_grid2_addDensity(
__m256 maxVec = _mm256_set1_ps(MAX_FLUID_VALUE);
__m256 existing;
__m256 delta;
__m256 dtVec = _mm256_set1_ps(dt);
float * x = GET_ARR_RAW(d,CENTER_LOC);
float * s = GET_ARR_RAW(d0,CENTER_LOC);
for(i=0; i<size; i=i+8){
@ -39,20 +41,25 @@ void fluid_grid2_addDensity(
_mm256_min_ps(
_mm256_add_ps(
existing,
delta
_mm256_mul_ps(
delta,
dtVec
)
),
maxVec
),
minVec
)
);
// x[i] += dt*s[i];
// if(x[i] < MIN_FLUID_VALUE){
// x[i] = MIN_FLUID_VALUE;
// } else if(x[i] > MAX_FLUID_VALUE){
// x[i] = MAX_FLUID_VALUE;
// }
}
// for(i=0; i<size; i++){
// x[i] += dt*s[i];
// if(x[i] < MIN_FLUID_VALUE){
// x[i] = MIN_FLUID_VALUE;
// } else if(x[i] > MAX_FLUID_VALUE){
// x[i] = MAX_FLUID_VALUE;
// }
// }
}
/*
@ -68,6 +75,14 @@ LIBRARY_API void fluid_grid2_solveDiffuseDensity(
int i, j, k, l, m;
float * x = GET_ARR_RAW(d,CENTER_LOC);
float * x0 = GET_ARR_RAW(d0,CENTER_LOC);
// float residual = 1;
// int iterations = 0;
// while(iterations < FLUID_GRID2_LINEARSOLVERTIMES && (residual > FLUID_GRID2_SOLVER_MULTIGRID_TOLERANCE || residual < -FLUID_GRID2_SOLVER_MULTIGRID_TOLERANCE)){
// residual = solver_multigrid_parallel_iterate(x,x0,a,c);
// fluid_grid2_set_bounds(FLUID_GRID2_BOUND_NO_DIR,x);
// iterations++;
// }
for(int l = 0; l < FLUID_GRID2_LINEARSOLVERTIMES; l++){
//iterate

View File

@ -1,5 +1,6 @@
#include <stdio.h>
#include <stdint.h>
#include <immintrin.h>
#include "fluid/env/utilities.h"
#include "fluid/queue/chunkmask.h"
@ -19,9 +20,25 @@
void fluid_grid2_add_source(float * x, float * s, float dt){
int i;
int size=DIM*DIM*DIM;
for(i=0; i<size; i++){
x[i] += dt*s[i];
__m256 existing;
__m256 delta;
__m256 dtVec = _mm256_set1_ps(dt);
for(i=0; i<size; i=i+8){
existing = _mm256_loadu_ps(&x[i]);
delta = _mm256_loadu_ps(&s[i]);
_mm256_storeu_ps(&x[i],
_mm256_add_ps(
existing,
_mm256_mul_ps(
delta,
dtVec
)
)
);
}
// for(i=0; i<size; i++){
// x[i] += dt*s[i];
// }
}
/**

View File

@ -73,6 +73,32 @@ LIBRARY_API void fluid_grid2_solveVectorDiffuse (
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_DIR_V,v);
fluid_grid2_set_bounds(FLUID_GRID2_BOUND_DIR_W,w);
}
// float residual;
// int iterations;
// residual = 1;
// iterations = 0;
// while(iterations < FLUID_GRID2_LINEARSOLVERTIMES && (residual > FLUID_GRID2_SOLVER_MULTIGRID_TOLERANCE || residual < -FLUID_GRID2_SOLVER_MULTIGRID_TOLERANCE)){
// residual = solver_multigrid_parallel_iterate(u,u0,a,c);
// fluid_grid2_set_bounds(FLUID_GRID2_BOUND_DIR_U,u);
// iterations++;
// }
// residual = 1;
// iterations = 0;
// while(iterations < FLUID_GRID2_LINEARSOLVERTIMES && (residual > FLUID_GRID2_SOLVER_MULTIGRID_TOLERANCE || residual < -FLUID_GRID2_SOLVER_MULTIGRID_TOLERANCE)){
// residual = solver_multigrid_parallel_iterate(v,v0,a,c);
// fluid_grid2_set_bounds(FLUID_GRID2_BOUND_DIR_V,v);
// iterations++;
// }
// residual = 1;
// iterations = 0;
// while(iterations < FLUID_GRID2_LINEARSOLVERTIMES && (residual > FLUID_GRID2_SOLVER_MULTIGRID_TOLERANCE || residual < -FLUID_GRID2_SOLVER_MULTIGRID_TOLERANCE)){
// residual = solver_multigrid_parallel_iterate(w,w0,a,c);
// fluid_grid2_set_bounds(FLUID_GRID2_BOUND_DIR_W,w);
// iterations++;
// }
}
/**