parallel prolongation

This commit is contained in:
austin 2024-12-11 19:05:25 -05:00
parent 66f7080890
commit 8e5d22426f

View File

@ -60,6 +60,7 @@ void prolongate_serial(float * phi, int GRIDDIM, float * lowerPhi, int LOWERDIM)
//parallelized operations
void restrict_parallel(float * currResidual, int GRIDDIM, float * lowerPhi, float * lowerPhi0, int LOWERDIM);
void prolongate_parallel(float * phi, int GRIDDIM, float * lowerPhi, int LOWERDIM);
/**
* Relaxes an ODE matrix by 1 iteration of multigrid method
@ -171,7 +172,7 @@ float solver_multigrid_iterate_parallel_recursive(float * phi, float * phi0, flo
}
//interpolate from the lower grid
prolongate_serial(phi,GRIDDIM,lowerPhi,LOWERDIM);
prolongate_parallel(phi,GRIDDIM,lowerPhi,LOWERDIM);
//smooth
solver_gauss_seidel_iterate_parallel(phi,phi0,a,c,GRIDDIM);
@ -321,7 +322,7 @@ void restrict_parallel(float * currResidual, int GRIDDIM, float * lowerPhi, floa
lowerPhi[solver_gauss_seidel_get_index(x,y,0,LOWERDIM)] = 0;
for(z = 1; z < LOWERDIM-7; z=z+8){
_mm256_storeu_ps(&lowerPhi[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],zeroVec);
residuals = _mm256_i32gather_ps(&currResidual[solver_gauss_seidel_get_index(x*2,y*2,z*2,GRIDDIM)],offsets,2);
residuals = _mm256_i32gather_ps(&currResidual[solver_gauss_seidel_get_index(x*2,y*2,z*2,GRIDDIM)],offsets,sizeof(float));
_mm256_storeu_ps(&lowerPhi0[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],residuals);
}
lowerPhi[solver_gauss_seidel_get_index(x,y,LOWERDIM - 1,LOWERDIM)] = 0;
@ -379,6 +380,31 @@ void prolongate_serial(float * phi, int GRIDDIM, float * lowerPhi, int LOWERDIM)
}
}
/**
* Prolongates a lower grid into a higher grid
*/
void prolongate_parallel(float * phi, int GRIDDIM, float * lowerPhi, int LOWERDIM){
__m256i offsets = _mm256_set_epi32(0, 0, 1, 1, 2, 2, 3, 3);
__m256 lowerPhiVec;
__m256 phiVec;
int x, y, z;
for(int x = 1; x < GRIDDIM - 1; x++){
for(int y = 1; y < GRIDDIM - 1; y++){
for(int z = 1; z < GRIDDIM - 1; z=z+8){
phiVec = _mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(x,y,z,GRIDDIM)]);
lowerPhiVec = _mm256_i32gather_ps(&lowerPhi[solver_gauss_seidel_get_index(x/2,y/2,z/2,LOWERDIM)],offsets,sizeof(float));
_mm256_storeu_ps(
&phi[solver_gauss_seidel_get_index(x,y,z,GRIDDIM)],
_mm256_add_ps(
phiVec,
lowerPhiVec
)
);
}
}
}
}
/**
* Verifies that all grids are allocated
*/