wiki:mpich/2008-07-20_YM_MPI_Course

Version 2 (modified by jazz, 16 years ago) (diff)

--

  • 使用你的帳號登入: ym
    login as: ym000
    ym000@140.129.162.12's password: ******
    ym000@bio001:~$ ssh-keygen -t rsa
    
  • 產生 SSH 認證金鑰
    Generating public/private rsa key pair.
    Enter file in which to save the key (/home/ym000/.ssh/id_rsa):
    Created directory '/home/ym000/.ssh'.
    Enter passphrase (empty for no passphrase):
    Enter same passphrase again:
    Your identification has been saved in /home/ym000/.ssh/id_rsa.
    Your public key has been saved in /home/ym000/.ssh/id_rsa.pub.
    The key fingerprint is:
    2a:6c:05:f8:24:38:db:79:b9:4f:0c:74:da:c5:16:05 ym000@bio001
    
  • 進行金鑰交換
    ym000@bio001:~$ cp .ssh/id_rsa.pub .ssh/authorized_keys
    
    • [備註] 因為是 DRBL 環境, 因此每一台都已經有 .ssh/authorized_keys, 如果不是 DRBL 環境, 你必須要自己手動把 .ssh/id_rsa.pub 拷貝到每一台 Compute Node 的 .ssh/authorized_keys
      ym000@bio001:~$ for ((i=2;i<=7;i++)); do scp .ssh/id_rsa.pub ym000@192.168.192.$i:.ssh/authorized_keys ; done
      
  • 設定 MPD 設定檔跟 MPI 的執行檔路徑
    ym000@bio001:~$ echo "MPD_SECRETWORD=${user}$$" > ~/.mpd.conf
    ym000@bio001:~$ chmod 600 .mpd.conf
    ym000@bio001:~$ for ((i=2;i<=7;i++)); do echo "192.168.129.$i" >> mpd.hosts; done
    ym000@bio001:~$ export PATH=$PATH:/opt/mpich2/bin
    ym000@bio001:~$ which mpdboot
    /opt/mpich2/bin/mpdboot
    
  • 設定 dsh (distributed shell)*, 我們可以使用 dsh 指令逐台執行.
    ym000@bio001:~$ mkdir -p .dsh/
    ym000@bio001:~$ cp mpd.hosts .dsh/machines.list
    ym000@bio001:~$ dsh -a hostname
    bio001002
    bio001003
    The authenticity of host '192.168.129.4 (192.168.129.4)' can't be established.
    RSA key fingerprint is f0:4b:6f:52:3c:0b:f4:8b:1c:a0:33:4a:e2:15:e0:5a.
    Are you sure you want to continue connecting (yes/no)? yes
    Warning: Permanently added '192.168.129.4' (RSA) to the list of known hosts.
    bio001004
    The authenticity of host '192.168.129.5 (192.168.129.5)' can't be established.
    RSA key fingerprint is 09:9b:25:5e:9c:a8:9a:dd:35:ee:f0:54:6a:11:b7:90.
    Are you sure you want to continue connecting (yes/no)? yes
    Warning: Permanently added '192.168.129.5' (RSA) to the list of known hosts.
    bio001005
    The authenticity of host '192.168.129.6 (192.168.129.6)' can't be established.
    RSA key fingerprint is 40:10:50:38:2c:f0:0b:f7:11:85:a3:41:d9:fb:ac:7d.
    Are you sure you want to continue connecting (yes/no)? yes
    Warning: Permanently added '192.168.129.6' (RSA) to the list of known hosts.
    bio001006
    The authenticity of host '192.168.129.7 (192.168.129.7)' can't be established.
    RSA key fingerprint is 07:95:b7:f8:a5:9c:c7:21:84:d0:5b:f4:5f:db:0b:a6.
    Are you sure you want to continue connecting (yes/no)? yes
    Warning: Permanently added '192.168.129.7' (RSA) to the list of known hosts.
    bio001007
    ym000@bio001:~$ dsh -a hostname
    bio001002
    bio001003
    bio001004
    bio001005
    bio001006
    bio001007
    
  • 用使用者的身分執行 mpd
    ym000@bio001:~$ mpdboot -n 7
    
  • 用 mpdtrace 檢查 mpd 執行狀態
    ym000@bio001:~$ mpdtrace 
    bio001
    bio001005
    bio001004
    bio001003
    bio001002
    bio001007
    bio001006
    
  • 用 mpdringtest 做 mpd 訊息傳遞效能測試
    ym000@bio001:~$ mpdringtest 1000
    time for 1000 loops = 0.648007154465 seconds
    
  • 用 mpiexec 執行 cpi 範例程式
    ym000@bio001:~$ mpiexec -n 3 /opt/mpich2/share/mpich2/examples/cpi
    Process 0 of 1 is on bio001
    pi is approximately 3.1415926544231341, Error is 0.0000000008333410
    wall clock time = 0.000284
    Process 0 of 1 is on bio001
    pi is approximately 3.1415926544231341, Error is 0.0000000008333410
    wall clock time = 0.000295
    Process 0 of 1 is on bio001
    pi is approximately 3.1415926544231341, Error is 0.0000000008333410
    wall clock time = 0.000294
    
  • 貼上 test1.c
    ym000@bio001:~$ cat << EOF > test1.c
    > #include <stdio.h>
    > #include <mpi.h>
    > main (int argc, char **argv)
    > {
    >   int rank, size, len;
    >   char name[MPI_MAX_PROCESSOR_NAME];
    >   MPI_Init(&argc, &argv);
    >   int myid, numprocs;
    >
    >   /* 取得 node 總數 */
    >   MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
    >   /* 取得本身 node id / rank */
    >   MPI_Comm_rank(MPI_COMM_WORLD,&myid);
      MPI_Get_processor_name(name, &len);
    >   /* 取得本身 host name */
    >   MPI_Get_processor_name(name, &len);
    >   printf("This is machine %d of %d name = %s\n", myid, numprocs, name);
    >
    >   MPI_Finalize();
    > }
    > EOF
    
  • 用 mpicc 編譯 test1.c
    ym000@bio001:~$ mpicc -o test1 test1.c
    
  • 用 mpiexec 執行 test1 程式
    ym000@bio001:~$ mpiexec -n 1 ./test1
    This is machine 0 of 1 name = bio001
    ym000@bio001:~$ mpiexec -n 12 ./test1
    This is machine 0 of 12 name = bio001
    This is machine 1 of 12 name = bio001004
    This is machine 2 of 12 name = bio001005
    This is machine 3 of 12 name = bio001003
    This is machine 4 of 12 name = bio001002
    This is machine 5 of 12 name = bio001007
    This is machine 6 of 12 name = bio001006
    This is machine 7 of 12 name = bio001
    This is machine 8 of 12 name = bio001004
    This is machine 11 of 12 name = bio001002
    This is machine 9 of 12 name = bio001005
    This is machine 10 of 12 name = bio001003
    
  • here is test1.c
    #include <stdio.h>
    #include <mpi.h>
    main (int argc, char **argv)
    {
      int rank, size, len;
      char name[MPI_MAX_PROCESSOR_NAME];
      MPI_Init(&argc, &argv);
      int myid, numprocs;
    
      /* 取得 node 總數 */
      MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
      /* 取得本身 node id / rank */
      MPI_Comm_rank(MPI_COMM_WORLD,&myid);
      /* 取得本身 host name */
      MPI_Get_processor_name(name, &len);
      printf("This is machine %d of %d name = %s\n", myid, numprocs, name);
    
      MPI_Finalize();
    }
    
  • here is test2.c
    #include <mpi.h>
    #include <stdio.h>
    main(int argc,char **argv) { 
      int n, myrank, numprocs;
      MPI_Status status;
      MPI_Init(&argc,&argv);
      MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
      MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
    
      /* node 0 will send the first message */
      if(myrank == 0) {
        n = myrank;
        MPI_Send(&n, 1, MPI_INT, 1, 99, MPI_COMM_WORLD);
        printf("[Ndde %d]「%d」 >> [Node %d]\n\n", myrank, n, myrank+1);
      }
    
      /* node 1 to node n-2 will send message to the next node */
      if(myrank>0 && myrank<numprocs-1) {
        MPI_Recv(&n, 1, MPI_INT, myrank-1, 99, MPI_COMM_WORLD, &status);
        printf("[Node %d] << 「%d」[Node %d]\n", myrank, n, status.MPI_SOURCE);
        n = myrank; MPI_Send(&n, 1, MPI_INT, myrank+1, 99, MPI_COMM_WORLD);
        printf("[Ndde %d]「%d」 >> [Node %d]\n\n", myrank, n, myrank+1);
      }
    
     /* the final node n-1 will not send any message but receive*/
      if(myrank==numprocs-1) {
        MPI_Recv(&n, 1, MPI_INT, myrank-1, 99, MPI_COMM_WORLD, &status);
        printf("[Node %d] << 「%d」[Node %d]\n", myrank, n, status.MPI_SOURCE);
        }
    
      MPI_Finalize();
    } 
    
  • here is test3.c
    /* Program:
     *   每個 node 將訊息傳送給 node 0,由,node 0 統一印出
     * History:
     *   2008-06-12 BETA
     *   2008-06-17 更改顯示方式,並增加註解
     */
    
    #include <stdio.h>
    #include <mpi.h>
    #include <string.h>
    
    main(int argc, char **argv)
    {
      int myrank, i, numprocs;
      char message[20];
      MPI_Status status;
      MPI_Init(&argc, &argv);
      MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
      MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
    
      /* Node 0 will do the following */
      if(myrank == 0)
      {
        /* receive messages from other nodes */
        for(i = 1; i < numprocs; i++)
        {
          MPI_Recv(message, 20, MPI_CHAR, i, 99, MPI_COMM_WORLD, &status);
          printf("[Node 0] << 「%s」[Node %d] \n", message, status.MPI_SOURCE);
        }
      }
    
      /* other Nodes will do the following */
      if(myrank != 0)
      {
        /* send node's rank to Node 0 */
        sprintf(message, "[%d]", myrank);
        MPI_Send(message, 20, MPI_CHAR, 0, 99, MPI_COMM_WORLD);
        printf("[Node %d]「%s」 >> [Node 0]\n", myrank, message);
      }
      MPI_Finalize();
    }
    
  • here is test4
    /* Program:
     *   mpich_example 內建範例,計算 pi 。
     * History:
     *   2008-04-11 BETA
     *   2008-06-19 增加可重複輸入欲計算之精準度
     *   2008-06-23 加入 MPI_Barrier 以確保每個 node 在接受 n 後才執行
     * /
    
    #include "mpi.h"
    #include <stdio.h>
    #include <math.h>
    #include <time.h>
    
    double f( double );
    double f( double a )
    {
        return (4.0 / (1.0 + a*a));
    }
    
    int main( int argc, char *argv[])
    {
        int done = 0, n, myid, numprocs, i=0;
        double PI25DT = 3.141592653589793238462643;
        double mypi, pi, h, sum, x;
        double startwtime = 0.0, endwtime;
        int  namelen;
        char processor_name[MPI_MAX_PROCESSOR_NAME];
        MPI_Init(&argc,&argv);
        MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
        MPI_Comm_rank(MPI_COMM_WORLD,&myid);
        MPI_Get_processor_name(processor_name,&namelen);
        fprintf(stderr,"Process %d on %s\n",
                myid, processor_name);
        n = 0;
        while (!done)
        {
            /* 由 node 0 將使用者輸入的值送給其它的 node  */
            if (myid == 0)
            {
                printf("Enter the number of intervals: (0 quits) ");
                scanf("%d", &n);
                startwtime = MPI_Wtime();
            }
    
            /* 這非常重要,所有的 node 必需在此同步,才可以收到使用者輸入的 n */
            MPI_Barrier(MPI_COMM_WORLD);
    
            /* 將 n 送給其它的 node  */
            MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
            if (n == 0)
                done = 1;
            else
            {
    
                /* 此為計算 pi 的演算法 */
                h   = 1.0 / (double) n;
                sum = 0.0;
                for (i = myid + 1; i <= n; i += numprocs)
                {
                    x = h * ((double)i - 0.5);
                    sum += f(x);
                }
                mypi = h * sum;
    
                /* 將算完的結果傳給 node 0 加總 */
                MPI_Reduce(&mypi, &pi, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
                if (myid == 0)
                {
                    printf("pi is approximately %.16f, Error is %.16f\n",
                           pi, fabs(pi - PI25DT));
                    endwtime = MPI_Wtime();
                    printf("wall clock time = %f\n",
                           endwtime-startwtime);
                }
            }
        }
        MPI_Finalize();
        return 0;
    }