/* */ /* CS 4331: Parallel Programming Fall, 2012 */ /* */ /* This MPI program computes a vector inner product in parallel. */ /* */ /* The computation is vip = sum(i=0; i<length){b0[i]*b1[i]} */ /* Each processor computes it local inner product and then MPI_Reduce */ /* is called to "MPI_SUM" all the "vip"s to one "result" in PROC_0. */ /* */ /* The "new" stuff that's used is: */ /* */ /* #define PROC_0 0 processor 0 */ /* #define B0_TYPE 176 message "types" */ /* #define B1_TYPE 177 */ /* MPI_Status stat; MPI structure for return codes */ /* */ /* MPI_Comm_size(MPI_COMM_WORLD, &numprocs); how many processors? */ /* MPI_Comm_rank(MPI_COMM_WORLD, &myid); which one am I? */ /* */ /* MPI_Send(b0, MAX_LEN, MPI_DOUBLE, p, B0_TYPE, MPI_COMM_WORLD); */ /* MPI_Send(b1, MAX_LEN, MPI_DOUBLE, p, B1_TYPE, MPI_COMM_WORLD); */ /* */ /* MPI_Recv(b0, MAX_LEN, MPI_DOUBLE, PROC_0, B0_TYPE, MPI_COMM_WORLD, &stat); */ /* MPI_Recv(b1, MAX_LEN, MPI_DOUBLE, PROC_0, B1_TYPE, MPI_COMM_WORLD, &stat); */ /* */ /* MPI_Barrier(MPI_COMM_WORLD); */ /* */ /* MPI_Reduce(&vip, &result, 1, MPI_DOUBLE, MPI_SUM, PROC_0, MPI_COMM_WORLD); */ /* */ #include <stdio.h> #include <stdlib.h> /* for the random number generator */ #include "mpi.h" #define MAX_LEN 1 << 18 /* maximum vector length */ #define PROC_0 0 /* processor 0 */ #define B0_TYPE 176 /* message "types" */ #define B1_TYPE 177 int main(argc,argv) int argc; char *argv[]; { int numprocs, p, /* number of processors, proc index */ myid, /* this processor's "rank" */ length, /* vector length */ i; double b0[MAX_LEN], b1[MAX_LEN], /* vectors */ vip, /* local vector inner product */ result; /* global " " " */ double start_time, end_time; /* "wallclock" times */ char proc_name[MPI_MAX_PROCESSOR_NAME]; int proc_name_len; MPI_Status stat; /* MPI structure containing return */ /* codes for message passing operations */ MPI_Init(&argc,&argv); /* initialize MPI */ MPI_Comm_size(MPI_COMM_WORLD, &numprocs); /*how many processors? */ MPI_Comm_rank(MPI_COMM_WORLD, &myid); /*which one am I? */ MPI_Get_processor_name(proc_name,&proc_name_len); /* what's my name? */ if (myid == PROC_0) { printf("There are %d processes.\n", numprocs); printf("Hello from processor 0: %s\n", proc_name); /* generate and distribute vectors for other processors */ srand48(0xFEEDFACE); for (p=1; p<numprocs; ++p) { for (i=0; i<MAX_LEN; ++i) { b0[i] = (double) drand48(); /* / (double) (1 << 30);*/ b1[i] = (double) drand48(); /* / (double) (1 << 30);*/ } MPI_Send(b0, MAX_LEN, MPI_DOUBLE, p, B0_TYPE, MPI_COMM_WORLD); MPI_Send(b1, MAX_LEN, MPI_DOUBLE, p, B1_TYPE, MPI_COMM_WORLD); } /* generate processor 0's vector */ for (i=0; i<MAX_LEN; ++i) { b0[i] = (double) drand48(); b1[i] = (double) drand48(); } } else // all other processors do this { printf("Hello from processor %s. My rank is %d.\n", proc_name, myid); /* receive vector from processor 0 */ MPI_Recv(b0, MAX_LEN, MPI_DOUBLE, PROC_0, B0_TYPE, MPI_COMM_WORLD, &stat); MPI_Recv(b1, MAX_LEN, MPI_DOUBLE, PROC_0, B1_TYPE, MPI_COMM_WORLD, &stat); } /* measure computational speed for vectors of various lengths */ for(length=MAX_LEN; length>=1024; length/=2) { /* synchronize the processors (roughly) on each pass */ MPI_Barrier(MPI_COMM_WORLD); /* start the clock in processor 0 */ if (myid == PROC_0) start_time = MPI_Wtime(); /* compute the local inner product */ vip = 0.0; for (i=0; i<length; ++i) vip += b0[i] * b1[i]; /* combine local inner products, result is in processor 0 only */ MPI_Reduce(&vip, &result, 1, MPI_DOUBLE, MPI_SUM, PROC_0, MPI_COMM_WORLD); /* stop the clock and print the time */ if (myid == PROC_0) { end_time = MPI_Wtime(); printf("Length = %d\tElapsed time=%lf\t %f\n", length*numprocs, end_time - start_time, result); } } MPI_Finalize(); }