parallel prolongation
This commit is contained in:
parent
66f7080890
commit
8e5d22426f
@ -60,6 +60,7 @@ void prolongate_serial(float * phi, int GRIDDIM, float * lowerPhi, int LOWERDIM)
|
||||
|
||||
//parallelized operations
|
||||
void restrict_parallel(float * currResidual, int GRIDDIM, float * lowerPhi, float * lowerPhi0, int LOWERDIM);
|
||||
void prolongate_parallel(float * phi, int GRIDDIM, float * lowerPhi, int LOWERDIM);
|
||||
|
||||
/**
|
||||
* Relaxes an ODE matrix by 1 iteration of multigrid method
|
||||
@ -171,7 +172,7 @@ float solver_multigrid_iterate_parallel_recursive(float * phi, float * phi0, flo
|
||||
}
|
||||
|
||||
//interpolate from the lower grid
|
||||
prolongate_serial(phi,GRIDDIM,lowerPhi,LOWERDIM);
|
||||
prolongate_parallel(phi,GRIDDIM,lowerPhi,LOWERDIM);
|
||||
|
||||
//smooth
|
||||
solver_gauss_seidel_iterate_parallel(phi,phi0,a,c,GRIDDIM);
|
||||
@ -321,7 +322,7 @@ void restrict_parallel(float * currResidual, int GRIDDIM, float * lowerPhi, floa
|
||||
lowerPhi[solver_gauss_seidel_get_index(x,y,0,LOWERDIM)] = 0;
|
||||
for(z = 1; z < LOWERDIM-7; z=z+8){
|
||||
_mm256_storeu_ps(&lowerPhi[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],zeroVec);
|
||||
residuals = _mm256_i32gather_ps(&currResidual[solver_gauss_seidel_get_index(x*2,y*2,z*2,GRIDDIM)],offsets,2);
|
||||
residuals = _mm256_i32gather_ps(&currResidual[solver_gauss_seidel_get_index(x*2,y*2,z*2,GRIDDIM)],offsets,sizeof(float));
|
||||
_mm256_storeu_ps(&lowerPhi0[solver_gauss_seidel_get_index(x,y,z,LOWERDIM)],residuals);
|
||||
}
|
||||
lowerPhi[solver_gauss_seidel_get_index(x,y,LOWERDIM - 1,LOWERDIM)] = 0;
|
||||
@ -379,6 +380,31 @@ void prolongate_serial(float * phi, int GRIDDIM, float * lowerPhi, int LOWERDIM)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Prolongates a lower grid into a higher grid
|
||||
*/
|
||||
void prolongate_parallel(float * phi, int GRIDDIM, float * lowerPhi, int LOWERDIM){
|
||||
__m256i offsets = _mm256_set_epi32(0, 0, 1, 1, 2, 2, 3, 3);
|
||||
__m256 lowerPhiVec;
|
||||
__m256 phiVec;
|
||||
int x, y, z;
|
||||
for(int x = 1; x < GRIDDIM - 1; x++){
|
||||
for(int y = 1; y < GRIDDIM - 1; y++){
|
||||
for(int z = 1; z < GRIDDIM - 1; z=z+8){
|
||||
phiVec = _mm256_loadu_ps(&phi[solver_gauss_seidel_get_index(x,y,z,GRIDDIM)]);
|
||||
lowerPhiVec = _mm256_i32gather_ps(&lowerPhi[solver_gauss_seidel_get_index(x/2,y/2,z/2,LOWERDIM)],offsets,sizeof(float));
|
||||
_mm256_storeu_ps(
|
||||
&phi[solver_gauss_seidel_get_index(x,y,z,GRIDDIM)],
|
||||
_mm256_add_ps(
|
||||
phiVec,
|
||||
lowerPhiVec
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Verifies that all grids are allocated
|
||||
*/
|
||||
|
||||
Loading…
Reference in New Issue
Block a user