* 使用你的帳號登入: ym** {{{ login as: ym000 ym000@140.129.162.12's password: ****** ym000@bio001:~$ ssh-keygen -t rsa }}} * 產生 SSH 認證金鑰 {{{ Generating public/private rsa key pair. Enter file in which to save the key (/home/ym000/.ssh/id_rsa): Created directory '/home/ym000/.ssh'. Enter passphrase (empty for no passphrase): Enter same passphrase again: Your identification has been saved in /home/ym000/.ssh/id_rsa. Your public key has been saved in /home/ym000/.ssh/id_rsa.pub. The key fingerprint is: 2a:6c:05:f8:24:38:db:79:b9:4f:0c:74:da:c5:16:05 ym000@bio001 }}} * 進行金鑰交換 {{{ ym000@bio001:~$ cp .ssh/id_rsa.pub .ssh/authorized_keys }}} * [備註] '''因為是 DRBL 環境, 因此每一台都已經有 .ssh/authorized_keys''', 如果不是 DRBL 環境, 你必須要自己手動把 .ssh/id_rsa.pub 拷貝到每一台 Compute Node 的 .ssh/authorized_keys {{{ ym000@bio001:~$ for ((i=2;i<=7;i++)); do scp .ssh/id_rsa.pub ym000@192.168.192.$i:.ssh/authorized_keys ; done }}} * 設定 MPD 設定檔跟 MPI 的執行檔路徑 {{{ ym000@bio001:~$ echo "MPD_SECRETWORD=${user}$$" > ~/.mpd.conf ym000@bio001:~$ chmod 600 .mpd.conf ym000@bio001:~$ for ((i=2;i<=7;i++)); do echo "192.168.129.$i" >> mpd.hosts; done ym000@bio001:~$ export PATH=$PATH:/opt/mpich2/bin ym000@bio001:~$ which mpdboot /opt/mpich2/bin/mpdboot }}} * 設定 dsh (distributed shell)[http://packages.debian.org/stable/net/dsh *], 我們可以使用 dsh 指令逐台執行. {{{ ym000@bio001:~$ mkdir -p .dsh/ ym000@bio001:~$ cp mpd.hosts .dsh/machines.list ym000@bio001:~$ dsh -a hostname bio001002 bio001003 The authenticity of host '192.168.129.4 (192.168.129.4)' can't be established. RSA key fingerprint is f0:4b:6f:52:3c:0b:f4:8b:1c:a0:33:4a:e2:15:e0:5a. Are you sure you want to continue connecting (yes/no)? yes Warning: Permanently added '192.168.129.4' (RSA) to the list of known hosts. bio001004 The authenticity of host '192.168.129.5 (192.168.129.5)' can't be established. RSA key fingerprint is 09:9b:25:5e:9c:a8:9a:dd:35:ee:f0:54:6a:11:b7:90. Are you sure you want to continue connecting (yes/no)? yes Warning: Permanently added '192.168.129.5' (RSA) to the list of known hosts. bio001005 The authenticity of host '192.168.129.6 (192.168.129.6)' can't be established. RSA key fingerprint is 40:10:50:38:2c:f0:0b:f7:11:85:a3:41:d9:fb:ac:7d. Are you sure you want to continue connecting (yes/no)? yes Warning: Permanently added '192.168.129.6' (RSA) to the list of known hosts. bio001006 The authenticity of host '192.168.129.7 (192.168.129.7)' can't be established. RSA key fingerprint is 07:95:b7:f8:a5:9c:c7:21:84:d0:5b:f4:5f:db:0b:a6. Are you sure you want to continue connecting (yes/no)? yes Warning: Permanently added '192.168.129.7' (RSA) to the list of known hosts. bio001007 ym000@bio001:~$ dsh -a hostname bio001002 bio001003 bio001004 bio001005 bio001006 bio001007 }}} * 用使用者的身分執行 mpd {{{ ym000@bio001:~$ mpdboot -n 7 }}} * 用 mpdtrace 檢查 mpd 執行狀態 {{{ ym000@bio001:~$ mpdtrace bio001 bio001005 bio001004 bio001003 bio001002 bio001007 bio001006 }}} * 用 mpdringtest 做 mpd 訊息傳遞效能測試 {{{ ym000@bio001:~$ mpdringtest 1000 time for 1000 loops = 0.648007154465 seconds }}} * 用 mpiexec 執行 cpi 範例程式 {{{ ym000@bio001:~$ mpiexec -n 3 /opt/mpich2/share/mpich2/examples/cpi Process 0 of 1 is on bio001 pi is approximately 3.1415926544231341, Error is 0.0000000008333410 wall clock time = 0.000284 Process 0 of 1 is on bio001 pi is approximately 3.1415926544231341, Error is 0.0000000008333410 wall clock time = 0.000295 Process 0 of 1 is on bio001 pi is approximately 3.1415926544231341, Error is 0.0000000008333410 wall clock time = 0.000294 }}} * 貼上 test1.c {{{ ym000@bio001:~$ cat << EOF > test1.c > #include > #include > main (int argc, char **argv) > { > int rank, size, len; > char name[MPI_MAX_PROCESSOR_NAME]; > MPI_Init(&argc, &argv); > int myid, numprocs; > > /* 取得 node 總數 */ > MPI_Comm_size(MPI_COMM_WORLD,&numprocs); > /* 取得本身 node id / rank */ > MPI_Comm_rank(MPI_COMM_WORLD,&myid); MPI_Get_processor_name(name, &len); > /* 取得本身 host name */ > MPI_Get_processor_name(name, &len); > printf("This is machine %d of %d name = %s\n", myid, numprocs, name); > > MPI_Finalize(); > } > EOF }}} * 用 mpicc 編譯 test1.c {{{ ym000@bio001:~$ mpicc -o test1 test1.c }}} * 用 mpiexec 執行 test1 程式 {{{ ym000@bio001:~$ mpiexec -n 1 ./test1 This is machine 0 of 1 name = bio001 ym000@bio001:~$ mpiexec -n 12 ./test1 This is machine 0 of 12 name = bio001 This is machine 1 of 12 name = bio001004 This is machine 2 of 12 name = bio001005 This is machine 3 of 12 name = bio001003 This is machine 4 of 12 name = bio001002 This is machine 5 of 12 name = bio001007 This is machine 6 of 12 name = bio001006 This is machine 7 of 12 name = bio001 This is machine 8 of 12 name = bio001004 This is machine 11 of 12 name = bio001002 This is machine 9 of 12 name = bio001005 This is machine 10 of 12 name = bio001003 }}} * here is test1.c {{{ #include #include main (int argc, char **argv) { int rank, size, len; char name[MPI_MAX_PROCESSOR_NAME]; MPI_Init(&argc, &argv); int myid, numprocs; /* 取得 node 總數 */ MPI_Comm_size(MPI_COMM_WORLD,&numprocs); /* 取得本身 node id / rank */ MPI_Comm_rank(MPI_COMM_WORLD,&myid); /* 取得本身 host name */ MPI_Get_processor_name(name, &len); printf("This is machine %d of %d name = %s\n", myid, numprocs, name); MPI_Finalize(); } }}} * here is test2.c {{{ #include #include main(int argc,char **argv) { int n, myrank, numprocs; MPI_Status status; MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&numprocs); MPI_Comm_rank(MPI_COMM_WORLD,&myrank); /* node 0 will send the first message */ if(myrank == 0) { n = myrank; MPI_Send(&n, 1, MPI_INT, 1, 99, MPI_COMM_WORLD); printf("[Ndde %d]「%d」 >> [Node %d]\n\n", myrank, n, myrank+1); } /* node 1 to node n-2 will send message to the next node */ if(myrank>0 && myrank> [Node %d]\n\n", myrank, n, myrank+1); } /* the final node n-1 will not send any message but receive*/ if(myrank==numprocs-1) { MPI_Recv(&n, 1, MPI_INT, myrank-1, 99, MPI_COMM_WORLD, &status); printf("[Node %d] << 「%d」[Node %d]\n", myrank, n, status.MPI_SOURCE); } MPI_Finalize(); } }}} * here is test3.c {{{ /* Program: * 每個 node 將訊息傳送給 node 0,由,node 0 統一印出 * History: * 2008-06-12 BETA * 2008-06-17 更改顯示方式,並增加註解 */ #include #include #include main(int argc, char **argv) { int myrank, i, numprocs; char message[20]; MPI_Status status; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &numprocs); MPI_Comm_rank(MPI_COMM_WORLD, &myrank); /* Node 0 will do the following */ if(myrank == 0) { /* receive messages from other nodes */ for(i = 1; i < numprocs; i++) { MPI_Recv(message, 20, MPI_CHAR, i, 99, MPI_COMM_WORLD, &status); printf("[Node 0] << 「%s」[Node %d] \n", message, status.MPI_SOURCE); } } /* other Nodes will do the following */ if(myrank != 0) { /* send node's rank to Node 0 */ sprintf(message, "[%d]", myrank); MPI_Send(message, 20, MPI_CHAR, 0, 99, MPI_COMM_WORLD); printf("[Node %d]「%s」 >> [Node 0]\n", myrank, message); } MPI_Finalize(); } }}} * here is test4 {{{ /* Program: * mpich_example 內建範例,計算 pi 。 * History: * 2008-04-11 BETA * 2008-06-19 增加可重複輸入欲計算之精準度 * 2008-06-23 加入 MPI_Barrier 以確保每個 node 在接受 n 後才執行 */ #include "mpi.h" #include #include #include double f( double ); double f( double a ) { return (4.0 / (1.0 + a*a)); } int main( int argc, char *argv[]) { int done = 0, n, myid, numprocs, i=0; double PI25DT = 3.141592653589793238462643; double mypi, pi, h, sum, x; double startwtime = 0.0, endwtime; int namelen; char processor_name[MPI_MAX_PROCESSOR_NAME]; MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&numprocs); MPI_Comm_rank(MPI_COMM_WORLD,&myid); MPI_Get_processor_name(processor_name,&namelen); fprintf(stderr,"Process %d on %s\n", myid, processor_name); n = 0; while (!done) { /* 由 node 0 將使用者輸入的值送給其它的 node */ if (myid == 0) { printf("Enter the number of intervals: (0 quits) "); scanf("%d", &n); startwtime = MPI_Wtime(); } /* 這非常重要,所有的 node 必需在此同步,才可以收到使用者輸入的 n */ MPI_Barrier(MPI_COMM_WORLD); /* 將 n 送給其它的 node */ MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD); if (n == 0) done = 1; else { /* 此為計算 pi 的演算法 */ h = 1.0 / (double) n; sum = 0.0; for (i = myid + 1; i <= n; i += numprocs) { x = h * ((double)i - 0.5); sum += f(x); } mypi = h * sum; /* 將算完的結果傳給 node 0 加總 */ MPI_Reduce(&mypi, &pi, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if (myid == 0) { printf("pi is approximately %.16f, Error is %.16f\n", pi, fabs(pi - PI25DT)); endwtime = MPI_Wtime(); printf("wall clock time = %f\n", endwtime-startwtime); } } } MPI_Finalize(); return 0; } }}}