wiki:mpich/2008-07-20_YM_MPI_Course

Version 1 (modified by jazz, 16 years ago) (diff)

--

  • 使用你的帳號登入: ym
    login as: ym000
    ym000@140.129.162.12's password: ******
    ym000@bio001:~$ ssh-keygen -t rsa
    
  • 產生 SSH 認證金鑰
    Generating public/private rsa key pair.
    Enter file in which to save the key (/home/ym000/.ssh/id_rsa):
    Created directory '/home/ym000/.ssh'.
    Enter passphrase (empty for no passphrase):
    Enter same passphrase again:
    Your identification has been saved in /home/ym000/.ssh/id_rsa.
    Your public key has been saved in /home/ym000/.ssh/id_rsa.pub.
    The key fingerprint is:
    2a:6c:05:f8:24:38:db:79:b9:4f:0c:74:da:c5:16:05 ym000@bio001
    
  • 進行金鑰交換
    ym000@bio001:~$ cp .ssh/id_rsa.pub .ssh/authorized_keys
    
    • [備註] 因為是 DRBL 環境, 因此每一台都已經有 .ssh/authorized_keys, 如果不是 DRBL 環境, 你必須要自己手動把 .ssh/id_rsa.pub 拷貝到每一台 Compute Node 的 .ssh/authorized_keys
      ym000@bio001:~$ for ((i=2;i<=7;i++)); do scp .ssh/id_rsa.pub ym000@192.168.192.$i:.ssh/authorized_keys ; done
      
  • 設定 MPD 設定檔跟 MPI 的執行檔路徑
    ym000@bio001:~$ echo "MPD_SECRETWORD=${user}$$" > ~/.mpd.conf
    ym000@bio001:~$ chmod 600 .mpd.conf
    ym000@bio001:~$ for ((i=2;i<=7;i++)); do echo "192.168.129.$i" >> mpd.hosts; done
    ym000@bio001:~$ export PATH=$PATH:/opt/mpich2/bin
    ym000@bio001:~$ which mpdboot
    /opt/mpich2/bin/mpdboot
    
  • 設定 dsh (distributed shell)*, 我們可以使用 dsh 指令逐台執行.
    ym000@bio001:~$ mkdir -p .dsh/
    ym000@bio001:~$ cp mpd.hosts .dsh/machines.list
    ym000@bio001:~$ dsh -a hostname
    bio001002
    bio001003
    The authenticity of host '192.168.129.4 (192.168.129.4)' can't be established.
    RSA key fingerprint is f0:4b:6f:52:3c:0b:f4:8b:1c:a0:33:4a:e2:15:e0:5a.
    Are you sure you want to continue connecting (yes/no)? yes
    Warning: Permanently added '192.168.129.4' (RSA) to the list of known hosts.
    bio001004
    The authenticity of host '192.168.129.5 (192.168.129.5)' can't be established.
    RSA key fingerprint is 09:9b:25:5e:9c:a8:9a:dd:35:ee:f0:54:6a:11:b7:90.
    Are you sure you want to continue connecting (yes/no)? yes
    Warning: Permanently added '192.168.129.5' (RSA) to the list of known hosts.
    bio001005
    The authenticity of host '192.168.129.6 (192.168.129.6)' can't be established.
    RSA key fingerprint is 40:10:50:38:2c:f0:0b:f7:11:85:a3:41:d9:fb:ac:7d.
    Are you sure you want to continue connecting (yes/no)? yes
    Warning: Permanently added '192.168.129.6' (RSA) to the list of known hosts.
    bio001006
    The authenticity of host '192.168.129.7 (192.168.129.7)' can't be established.
    RSA key fingerprint is 07:95:b7:f8:a5:9c:c7:21:84:d0:5b:f4:5f:db:0b:a6.
    Are you sure you want to continue connecting (yes/no)? yes
    Warning: Permanently added '192.168.129.7' (RSA) to the list of known hosts.
    bio001007
    ym000@bio001:~$ dsh -a hostname
    bio001002
    bio001003
    bio001004
    bio001005
    bio001006
    bio001007
    
  • 用使用者的身分執行 mpd
    ym000@bio001:~$ mpdboot -n 7
    
  • 用 mpdtrace 檢查 mpd 執行狀態
    ym000@bio001:~$ mpdtrace 
    bio001
    bio001005
    bio001004
    bio001003
    bio001002
    bio001007
    bio001006
    
  • 用 mpdringtest 做 mpd 訊息傳遞效能測試
    ym000@bio001:~$ mpdringtest 1000
    time for 1000 loops = 0.648007154465 seconds
    
  • 用 mpiexec 執行 cpi 範例程式
    ym000@bio001:~$ mpiexec -n 3 /opt/mpich2/share/mpich2/examples/cpi
    Process 0 of 1 is on bio001
    pi is approximately 3.1415926544231341, Error is 0.0000000008333410
    wall clock time = 0.000284
    Process 0 of 1 is on bio001
    pi is approximately 3.1415926544231341, Error is 0.0000000008333410
    wall clock time = 0.000295
    Process 0 of 1 is on bio001
    pi is approximately 3.1415926544231341, Error is 0.0000000008333410
    wall clock time = 0.000294
    
  • 貼上 test1.c
    ym000@bio001:~$ cat << EOF > test1.c
    > #include <stdio.h>
    > #include <mpi.h>
    > main (int argc, char **argv)
    > {
    >   int rank, size, len;
    >   char name[MPI_MAX_PROCESSOR_NAME];
    >   MPI_Init(&argc, &argv);
    >   int myid, numprocs;
    >
    >   /* 取得 node 總數 */
    >   MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
    >   /* 取得本身 node id / rank */
    >   MPI_Comm_rank(MPI_COMM_WORLD,&myid);
      MPI_Get_processor_name(name, &len);
    >   /* 取得本身 host name */
    >   MPI_Get_processor_name(name, &len);
    >   printf("This is machine %d of %d name = %s\n", myid, numprocs, name);
    >
    >   MPI_Finalize();
    > }
    > EOF
    
  • 用 mpicc 編譯 test1.c
    ym000@bio001:~$ mpicc -o test1 test1.c
    
  • 用 mpiexec 執行 test1 程式
    ym000@bio001:~$ mpiexec -n 1 ./test1
    This is machine 0 of 1 name = bio001
    ym000@bio001:~$ mpiexec -n 12 ./test1
    This is machine 0 of 12 name = bio001
    This is machine 1 of 12 name = bio001004
    This is machine 2 of 12 name = bio001005
    This is machine 3 of 12 name = bio001003
    This is machine 4 of 12 name = bio001002
    This is machine 5 of 12 name = bio001007
    This is machine 6 of 12 name = bio001006
    This is machine 7 of 12 name = bio001
    This is machine 8 of 12 name = bio001004
    This is machine 11 of 12 name = bio001002
    This is machine 9 of 12 name = bio001005
    This is machine 10 of 12 name = bio001003
    
  • here is test1.c
    #include <stdio.h>
    #include <mpi.h>
    main (int argc, char **argv)
    {
      int rank, size, len;
      char name[MPI_MAX_PROCESSOR_NAME];
      MPI_Init(&argc, &argv);
      int myid, numprocs;
    
      /* 取得 node 總數 */
      MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
      /* 取得本身 node id / rank */
      MPI_Comm_rank(MPI_COMM_WORLD,&myid);
      /* 取得本身 host name */
      MPI_Get_processor_name(name, &len);
      printf("This is machine %d of %d name = %s\n", myid, numprocs, name);
    
      MPI_Finalize();
    }
    
  • here is test2.c
    #include <mpi.h>
    #include <stdio.h>
    main(int argc,char **argv) { 
      int n, myrank, numprocs;
      MPI_Status status;
      MPI_Init(&argc,&argv);
      MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
      MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
    
      /* node 0 will send the first message */
      if(myrank == 0) {
        n = myrank;
        MPI_Send(&n, 1, MPI_INT, 1, 99, MPI_COMM_WORLD);
        printf("[Ndde %d]「%d」 >> [Node %d]\n\n", myrank, n, myrank+1);
      }
    
      /* node 1 to node n-2 will send message to the next node */
      if(myrank>0 && myrank<numprocs-1) {
        MPI_Recv(&n, 1, MPI_INT, myrank-1, 99, MPI_COMM_WORLD, &status);
        printf("[Node %d] << 「%d」[Node %d]\n", myrank, n, status.MPI_SOURCE);
        n = myrank; MPI_Send(&n, 1, MPI_INT, myrank+1, 99, MPI_COMM_WORLD);
        printf("[Ndde %d]「%d」 >> [Node %d]\n\n", myrank, n, myrank+1);
      }
    
     /* the final node n-1 will not send any message but receive*/
      if(myrank==numprocs-1) {
        MPI_Recv(&n, 1, MPI_INT, myrank-1, 99, MPI_COMM_WORLD, &status);
        printf("[Node %d] << 「%d」[Node %d]\n", myrank, n, status.MPI_SOURCE);
        }
    
      MPI_Finalize();
    } 
    
  • here is test3.c
    /* Program:
     *   每個 node 將訊息傳送給 node 0,由,node 0 統一印出
     * History:
     *   2008-06-12 BETA
     *   2008-06-17 更改顯示方式,並增加註解
     */
    
    #include <stdio.h>
    #include <mpi.h>
    #include <string.h>
    
    main(int argc, char **argv)
    {
      int myrank, i, numprocs;
      char message[20];
      MPI_Status status;
      MPI_Init(&argc, &argv);
      MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
      MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
    
      /* Node 0 will do the following */
      if(myrank == 0)
      {
        /* receive messages from other nodes */
        for(i = 1; i < numprocs; i++)
        {
          MPI_Recv(message, 20, MPI_CHAR, i, 99, MPI_COMM_WORLD, &status);
          printf("[Node 0] << 「%s」[Node %d] \n", message, status.MPI_SOURCE);
        }
      }
    
      /* other Nodes will do the following */
      if(myrank != 0)
      {
        /* send node's rank to Node 0 */
        sprintf(message, "[%d]", myrank);
        MPI_Send(message, 20, MPI_CHAR, 0, 99, MPI_COMM_WORLD);
        printf("[Node %d]「%s」 >> [Node 0]\n", myrank, message);
      }
      MPI_Finalize();
    }
    
  • here is test4
    /* Program:
     *   讓 node 0 可以接受來自任何 node 的訊息,每個 node 將訊息標上不同 tag 後傳給 node 0
     * History:
     *   2008-06-24 BETA
     */
    
    #include <stdio.h>
    #include <mpi.h>
    
    main (int argc, char **argv)
    {
      int numprocs, myrank, i=0, buf;
      MPI_Status status;
      MPI_Init(&argc, &argv);
      MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
      MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
    
      /* 除了 Node 0 以外的所有 node 都要送 5 個訊息給 node 0 , 將 i 當成 tag 送出 */
      if (myrank > 0)
      {
        for(i = 0; i < 5; i++)
        {
          buf = myrank * 100 + i;
          MPI_Send(&buf, 1, MPI_INT, 0, i, MPI_COMM_WORLD);
        }
      }
      if (myrank == 0)
      {
        for (i = 0; i < 5*(numprocs-1); i++)
        {
    
          /* MPI_ANY_SOURCE 接收來自任何 node , MPI_ANY_TAG 接收來自任何 tag */
          MPI_Recv(&buf, 1, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
          printf("[Node %d][Tag %d] => %d\n", status.MPI_SOURCE, status.MPI_TAG, buf);
        }
      }
      MPI_Finalize();
    }