[104] | 1 | #!/bin/bash |
---|
| 2 | |
---|
| 3 | ######## |
---|
| 4 | # 安裝 openPBS / torque |
---|
| 5 | ######## |
---|
| 6 | if [ ! -e torque-2.3.6.tar.gz ]; then |
---|
[163] | 7 | wget http://www.clusterresources.com/downloads/torque/torque-2.3.6.tar.gz |
---|
[104] | 8 | tar zxvf torque-2.3.6.tar.gz |
---|
| 9 | cd torque-2.3.6/ |
---|
| 10 | ./configure |
---|
| 11 | sudo make |
---|
| 12 | sudo make install |
---|
| 13 | # to make sure libtorque.so.2 is updated |
---|
| 14 | # or there will be an error message: |
---|
| 15 | # |
---|
| 16 | # pbs_mom: error while loading shared libraries: libtorque.so.2: cannot open shared object file: |
---|
| 17 | # No such file or directory |
---|
| 18 | # |
---|
| 19 | sudo ldconfig |
---|
| 20 | # create init.d scripts for torque |
---|
| 21 | sudo cp contrib/init.d/debian.pbs_mom /etc/init.d/pbs_mom |
---|
| 22 | sudo cp contrib/init.d/debian.pbs_sched /etc/init.d/pbs_sched |
---|
| 23 | sudo cp contrib/init.d/debian.pbs_server /etc/init.d/pbs_server |
---|
| 24 | fi |
---|
| 25 | |
---|
| 26 | sudo cat /etc/dhcp3/dhcpd.conf | grep "host .* {" | awk '{ print $2 }' > nodes |
---|
| 27 | sudo mv nodes /var/spool/torque/server_priv/nodes |
---|
| 28 | sudo pbs_server -t create |
---|
| 29 | cat > torque_conf << EOF |
---|
| 30 | create queue batch |
---|
| 31 | set queue batch queue_type = Execution |
---|
| 32 | set queue batch started = True |
---|
| 33 | set queue batch enabled = True |
---|
| 34 | set server default_queue = batch |
---|
| 35 | set server resources_default.nodes = 1 |
---|
| 36 | set server scheduling = True |
---|
| 37 | EOF |
---|
[166] | 38 | for i in `cat /var/spool/torque/server_priv/nodes` |
---|
| 39 | do |
---|
| 40 | echo "set server submit_hosts += $i" >> torque_conf |
---|
| 41 | done |
---|
[104] | 42 | sudo qmgr < torque_conf |
---|
| 43 | cat >> config << EOF |
---|
| 44 | \$pbsserver $(hostname) |
---|
| 45 | \$logevent 255 |
---|
[164] | 46 | \$usecp *:/home /home |
---|
[104] | 47 | EOF |
---|
| 48 | sudo mv config /var/spool/torque/mom_priv/jobs/config |
---|
[163] | 49 | # terminate pbs_server and pbs_schedule |
---|
[104] | 50 | sudo qterm -t quick |
---|
[163] | 51 | sudo /etc/init.d/pbs_sched stop |
---|
| 52 | # re-deploy DRBL |
---|
[104] | 53 | sudo /opt/drbl/sbin/drblpush -c /etc/drbl/drblpush.conf |
---|
[163] | 54 | # copy torque related configuration files for pbs_mom |
---|
[104] | 55 | sudo /opt/drbl/sbin/drbl-cp-host /var/spool/torque/server_priv/nodes /var/spool/torque/server_priv/nodes |
---|
| 56 | sudo /opt/drbl/sbin/drbl-cp-host /var/spool/torque/mom_priv/jobs/config /var/spool/torque/mom_priv/jobs/config |
---|
| 57 | sudo /opt/drbl/sbin/drbl-cp-host /var/spool/torque/pbs_environment /var/spool/torque/pbs_environment |
---|
[163] | 58 | sudo /opt/drbl/sbin/drbl-cp-host /var/spool/torque/server_name /var/spool/torque/server_name |
---|
[104] | 59 | # enable pbs_mom service for DRBL Clients |
---|
| 60 | sudo /opt/drbl/sbin/drbl-client-service pbs_mom on |
---|
| 61 | # enable pbs_sched and pbs_server services for DRBL Server |
---|
| 62 | sudo update-rc.d pbs_sched defaults |
---|
| 63 | sudo update-rc.d pbs_server defaults |
---|
[164] | 64 | # sync. cluster time |
---|
[166] | 65 | sudo /opt/drbl/bin/drbl-doit ntpdate 0.debian.pool.ntp.org |
---|
[163] | 66 | # restart pbs_sched and pbs_server |
---|
| 67 | sudo /etc/init.d/pbs_sched start |
---|
| 68 | sudo /etc/init.d/pbs_server start |
---|
| 69 | # start pbs_mom |
---|
[166] | 70 | sudo /opt/drbl/bin/drbl-doit /etc/init.d/pbs_mom start |
---|
[167] | 71 | # check if /etc/hosts have more than one hostname |
---|
| 72 | echo "[32m please check if /etc/hosts have lots of hostname[0m" |
---|
| 73 | grep $(hostname) /etc/hosts |
---|