Skip to content
Snippets Groups Projects
Commit 891b9d65 authored by Daniel Maier's avatar Daniel Maier
Browse files

rodinia benchmark added

parent 5c962a9c
No related branches found
No related tags found
No related merge requests found
#include <stdio.h>
#include <stdlib.h>
#include <omp.h>
#include <sys/time.h>
// Returns the current system time in microseconds
long long get_time()
{
struct timeval tv;
gettimeofday(&tv, NULL);
return (tv.tv_sec * 1000000) + tv.tv_usec;
}
using namespace std;
#define BLOCK_SIZE 16
#define BLOCK_SIZE_C BLOCK_SIZE
#define BLOCK_SIZE_R BLOCK_SIZE
#define STR_SIZE 256
/* maximum power density possible (say 300W for a 10mm x 10mm chip) */
#define MAX_PD (3.0e6)
/* required precision in degrees */
#define PRECISION 0.001
#define SPEC_HEAT_SI 1.75e6
#define K_SI 100
/* capacitance fitting factor */
#define FACTOR_CHIP 0.5
#define OPEN
//#define NUM_THREAD 4
typedef float FLOAT;
/* chip parameters */
const FLOAT t_chip = 0.0005;
const FLOAT chip_height = 0.016;
const FLOAT chip_width = 0.016;
#ifdef OMP_OFFLOAD
#pragma offload_attribute(push, target(mic))
#endif
/* ambient temperature, assuming no package at all */
const FLOAT amb_temp = 80.0;
int num_omp_threads;
/* Single iteration of the transient solver in the grid model.
* advances the solution of the discretized difference equations
* by one time step
*/
void single_iteration(FLOAT *result, FLOAT *temp, FLOAT *power, int row, int col,
FLOAT Cap_1, FLOAT Rx_1, FLOAT Ry_1, FLOAT Rz_1,
FLOAT step)
{
FLOAT delta;
int r, c;
int chunk;
int num_chunk = row*col / (BLOCK_SIZE_R * BLOCK_SIZE_C);
int chunks_in_row = col/BLOCK_SIZE_C;
int chunks_in_col = row/BLOCK_SIZE_R;
#ifdef OPEN
#ifndef __MIC__
omp_set_num_threads(num_omp_threads);
#endif
#pragma omp parallel for shared(power, temp, result) private(chunk, r, c, delta) firstprivate(row, col, num_chunk, chunks_in_row) schedule(static)
#endif
for ( chunk = 0; chunk < num_chunk; ++chunk )
{
int r_start = BLOCK_SIZE_R*(chunk/chunks_in_col);
int c_start = BLOCK_SIZE_C*(chunk%chunks_in_row);
int r_end = r_start + BLOCK_SIZE_R > row ? row : r_start + BLOCK_SIZE_R;
int c_end = c_start + BLOCK_SIZE_C > col ? col : c_start + BLOCK_SIZE_C;
if ( r_start == 0 || c_start == 0 || r_end == row || c_end == col )
{
for ( r = r_start; r < r_start + BLOCK_SIZE_R; ++r ) {
for ( c = c_start; c < c_start + BLOCK_SIZE_C; ++c ) {
/* Corner 1 */
if ( (r == 0) && (c == 0) ) {
delta = (Cap_1) * (power[0] +
(temp[1] - temp[0]) * Rx_1 +
(temp[col] - temp[0]) * Ry_1 +
(amb_temp - temp[0]) * Rz_1);
} /* Corner 2 */
else if ((r == 0) && (c == col-1)) {
delta = (Cap_1) * (power[c] +
(temp[c-1] - temp[c]) * Rx_1 +
(temp[c+col] - temp[c]) * Ry_1 +
( amb_temp - temp[c]) * Rz_1);
} /* Corner 3 */
else if ((r == row-1) && (c == col-1)) {
delta = (Cap_1) * (power[r*col+c] +
(temp[r*col+c-1] - temp[r*col+c]) * Rx_1 +
(temp[(r-1)*col+c] - temp[r*col+c]) * Ry_1 +
( amb_temp - temp[r*col+c]) * Rz_1);
} /* Corner 4 */
else if ((r == row-1) && (c == 0)) {
delta = (Cap_1) * (power[r*col] +
(temp[r*col+1] - temp[r*col]) * Rx_1 +
(temp[(r-1)*col] - temp[r*col]) * Ry_1 +
(amb_temp - temp[r*col]) * Rz_1);
} /* Edge 1 */
else if (r == 0) {
delta = (Cap_1) * (power[c] +
(temp[c+1] + temp[c-1] - 2.0*temp[c]) * Rx_1 +
(temp[col+c] - temp[c]) * Ry_1 +
(amb_temp - temp[c]) * Rz_1);
} /* Edge 2 */
else if (c == col-1) {
delta = (Cap_1) * (power[r*col+c] +
(temp[(r+1)*col+c] + temp[(r-1)*col+c] - 2.0*temp[r*col+c]) * Ry_1 +
(temp[r*col+c-1] - temp[r*col+c]) * Rx_1 +
(amb_temp - temp[r*col+c]) * Rz_1);
} /* Edge 3 */
else if (r == row-1) {
delta = (Cap_1) * (power[r*col+c] +
(temp[r*col+c+1] + temp[r*col+c-1] - 2.0*temp[r*col+c]) * Rx_1 +
(temp[(r-1)*col+c] - temp[r*col+c]) * Ry_1 +
(amb_temp - temp[r*col+c]) * Rz_1);
} /* Edge 4 */
else if (c == 0) {
delta = (Cap_1) * (power[r*col] +
(temp[(r+1)*col] + temp[(r-1)*col] - 2.0*temp[r*col]) * Ry_1 +
(temp[r*col+1] - temp[r*col]) * Rx_1 +
(amb_temp - temp[r*col]) * Rz_1);
}
result[r*col+c] =temp[r*col+c]+ delta;
}
}
continue;
}
for ( r = r_start; r < r_start + BLOCK_SIZE_R; ++r ) {
#pragma omp simd
for ( c = c_start; c < c_start + BLOCK_SIZE_C; ++c ) {
/* Update Temperatures */
result[r*col+c] =temp[r*col+c]+
( Cap_1 * (power[r*col+c] +
(temp[(r+1)*col+c] + temp[(r-1)*col+c] - 2.f*temp[r*col+c]) * Ry_1 +
(temp[r*col+c+1] + temp[r*col+c-1] - 2.f*temp[r*col+c]) * Rx_1 +
(amb_temp - temp[r*col+c]) * Rz_1));
}
}
}
}
#ifdef OMP_OFFLOAD
#pragma offload_attribute(pop)
#endif
/* Transient solver driver routine: simply converts the heat
* transfer differential equations to difference equations
* and solves the difference equations by iterating
*/
void compute_tran_temp(FLOAT *result, int num_iterations, FLOAT *temp, FLOAT *power, int row, int col)
{
#ifdef VERBOSE
int i = 0;
#endif
FLOAT grid_height = chip_height / row;
FLOAT grid_width = chip_width / col;
FLOAT Cap = FACTOR_CHIP * SPEC_HEAT_SI * t_chip * grid_width * grid_height;
FLOAT Rx = grid_width / (2.0 * K_SI * t_chip * grid_height);
FLOAT Ry = grid_height / (2.0 * K_SI * t_chip * grid_width);
FLOAT Rz = t_chip / (K_SI * grid_height * grid_width);
FLOAT max_slope = MAX_PD / (FACTOR_CHIP * t_chip * SPEC_HEAT_SI);
FLOAT step = PRECISION / max_slope / 1000.0;
FLOAT Rx_1=1.f/Rx;
FLOAT Ry_1=1.f/Ry;
FLOAT Rz_1=1.f/Rz;
FLOAT Cap_1 = step/Cap;
#ifdef VERBOSE
fprintf(stdout, "total iterations: %d s\tstep size: %g s\n", num_iterations, step);
fprintf(stdout, "Rx: %g\tRy: %g\tRz: %g\tCap: %g\n", Rx, Ry, Rz, Cap);
#endif
#ifdef OMP_OFFLOAD
int array_size = row*col;
#pragma omp target \
map(temp[0:array_size]) \
map(to: power[0:array_size], row, col, Cap_1, Rx_1, Ry_1, Rz_1, step, num_iterations) \
map( result[0:array_size])
#endif
{
FLOAT* r = result;
FLOAT* t = temp;
for (int i = 0; i < num_iterations ; i++)
{
#ifdef VERBOSE
fprintf(stdout, "iteration %d\n", i++);
#endif
single_iteration(r, t, power, row, col, Cap_1, Rx_1, Ry_1, Rz_1, step);
FLOAT* tmp = t;
t = r;
r = tmp;
}
}
#ifdef VERBOSE
fprintf(stdout, "iteration %d\n", i++);
#endif
}
void fatal(char *s)
{
fprintf(stderr, "error: %s\n", s);
exit(1);
}
void writeoutput(FLOAT *vect, int grid_rows, int grid_cols, char *file) {
int i,j, index=0;
FILE *fp;
char str[STR_SIZE];
if( (fp = fopen(file, "w" )) == 0 )
printf( "The file was not opened\n" );
for (i=0; i < grid_rows; i++)
for (j=0; j < grid_cols; j++)
{
sprintf(str, "%d\t%g\n", index, vect[i*grid_cols+j]);
fputs(str,fp);
index++;
}
fclose(fp);
}
void read_input(FLOAT *vect, int grid_rows, int grid_cols, char *file)
{
int i, index;
FILE *fp;
char str[STR_SIZE];
FLOAT val;
fp = fopen (file, "r");
if (!fp)
fatal ("file could not be opened for reading");
for (i=0; i < grid_rows * grid_cols; i++) {
fgets(str, STR_SIZE, fp);
if (feof(fp))
fatal("not enough lines in file");
if ((sscanf(str, "%f", &val) != 1) )
fatal("invalid file format");
vect[i] = val;
}
fclose(fp);
}
void usage(int argc, char **argv)
{
fprintf(stderr, "Usage: %s <grid_rows> <grid_cols> <sim_time> <no. of threads><temp_file> <power_file>\n", argv[0]);
fprintf(stderr, "\t<grid_rows> - number of rows in the grid (positive integer)\n");
fprintf(stderr, "\t<grid_cols> - number of columns in the grid (positive integer)\n");
fprintf(stderr, "\t<sim_time> - number of iterations\n");
fprintf(stderr, "\t<no. of threads> - number of threads\n");
fprintf(stderr, "\t<temp_file> - name of the file containing the initial temperature values of each cell\n");
fprintf(stderr, "\t<power_file> - name of the file containing the dissipated power values of each cell\n");
fprintf(stderr, "\t<output_file> - name of the output file\n");
exit(1);
}
int main(int argc, char **argv)
{
int grid_rows, grid_cols, sim_time, i;
FLOAT *temp, *power, *result;
char *tfile, *pfile, *ofile;
/* check validity of inputs */
if (argc != 8)
usage(argc, argv);
if ((grid_rows = atoi(argv[1])) <= 0 ||
(grid_cols = atoi(argv[2])) <= 0 ||
(sim_time = atoi(argv[3])) <= 0 ||
(num_omp_threads = atoi(argv[4])) <= 0
)
usage(argc, argv);
/* allocate memory for the temperature and power arrays */
temp = (FLOAT *) calloc (grid_rows * grid_cols, sizeof(FLOAT));
power = (FLOAT *) calloc (grid_rows * grid_cols, sizeof(FLOAT));
result = (FLOAT *) calloc (grid_rows * grid_cols, sizeof(FLOAT));
if(!temp || !power)
fatal("unable to allocate memory");
/* read initial temperatures and input power */
tfile = argv[5];
pfile = argv[6];
ofile = argv[7];
read_input(temp, grid_rows, grid_cols, tfile);
read_input(power, grid_rows, grid_cols, pfile);
printf("Start computing the transient temperature\n");
long long start_time = get_time();
compute_tran_temp(result,sim_time, temp, power, grid_rows, grid_cols);
long long end_time = get_time();
printf("Ending simulation\n");
printf("Total time: %.3f seconds\n", ((float) (end_time - start_time)) / (1000*1000));
writeoutput((1&sim_time) ? result : temp, grid_rows, grid_cols, ofile);
/* output results */
#ifdef VERBOSE
fprintf(stdout, "Final Temperatures:\n");
#endif
#ifdef OUTPUT
for(i=0; i < grid_rows * grid_cols; i++)
fprintf(stdout, "%d\t%g\n", i, temp[i]);
#endif
/* cleanup */
free(temp);
free(power);
return 0;
}
/* vim: set ts=4 sw=4 sts=4 et si ai: */
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment