Changes between Version 5 and Version 6 of YMU110509/Lab9


Ignore:
Timestamp:
Jun 20, 2011, 2:52:34 PM (13 years ago)
Author:
jazz
Comment:

--

Legend:

Unmodified
Added
Removed
Modified
  • YMU110509/Lab9

    v5 v6  
    5555 * 接著,讓我們在本地端先驗證一下 testmapper.sh 的運作
    5656{{{
    57 ~$ head -n 10 sample-01.txt > sample-00.txt
     57~$ head -n 2 sample-01.txt > sample-00.txt
    5858~$ cat > testmapper.sh << EOF
    5959#!/bin/bash
    6060
    61 id="h998"
     61id="`whoami`"
    6262mkdir -p /tmp/\$id
    6363host=\`hostname\`
     
    6868  input=\$line
    6969  filename=\`basename \$input\`
    70   echo "\$uid@$host:\$pwd> hadoop fs -get \$input /tmp/\$id/\$filename"
    71   echo "\$uid@$host:\$pwd> velveth output-\$filename 17 -fasta -short /tmp/\$id/\$filename"
    72   echo "\$uid@$host:\$pwd> hadoop fs -put output-\$filename ."
     70  echo "\$uid@\$host:\$pwd> hadoop fs -get \$input /tmp/\$id/\$filename"
     71  echo "\$uid@\$host:\$pwd> velveth output-\$filename 17 -fasta -short /tmp/\$id/\$filename"
     72  echo "\$uid@\$host:\$pwd> hadoop fs -put output-\$filename ."
    7373done
    7474rm -rf /tmp/\$id
     
    9595== 實作透過 Hadoop Streaming 執行 99 組 velvet 運算 ==
    9696
    97  * 撰寫 velvet_mapper.pl
     97 * 撰寫 mapper.sh
    9898{{{
    99 #!perl
     99#!sh
     100#!/bin/bash
    100101
     102id="h998"
     103mkdir -p /tmp/$id
     104host=`hostname`
     105pwd=`pwd`
     106uid=`whoami`
     107
     108while read line; do
     109  input=$line
     110  filename=`basename $input`
     111  echo "$uid@$host> hadoop fs -get $input /tmp/$id/$filename"
     112  hadoop fs -get $input /tmp/$id/$filename
     113  echo "$uid@$host> velveth output-$filename 17 -fasta -short /tmp/$id/$filename"
     114  velveth output-$filename 17 -fasta -short /tmp/$id/$filename
     115  echo "$uid@$host> hadoop fs -put output-$filename /user/$id/."
     116  hadoop fs -put output-$filename /user/$id/.
     117done
     118rm -rf /tmp/$id
    101119}}}
     120 * 於本機測試 mapper.sh
     121{{{
     122~$ cat > mapper.sh << EOF
     123#!/bin/bash
     124
     125id="`whoami`"
     126mkdir -p /tmp/\$id
     127host=\`hostname\`
     128pwd=\`pwd\`
     129uid=\`whoami\`
     130
     131while read line; do
     132  input=\$line
     133  filename=\`basename \$input\`
     134  echo "\$uid@\$host> hadoop fs -get \$input /tmp/\$id/\$filename"
     135  hadoop fs -get \$input /tmp/\$id/\$filename
     136  echo "\$uid@\$host> velveth output-\$filename 17 -fasta -short /tmp/\$id/\$filename"
     137  velveth output-\$filename 17 -fasta -short /tmp/\$id/\$filename
     138  echo "\$uid@\$host> hadoop fs -put output-\$filename /user/\$id/."
     139  hadoop fs -put output-\$filename /user/\$id/.
     140done
     141rm -rf /tmp/\$id
     142EOF
     143~$ chmod a+x mapper.sh
     144~$ cat sample-00.txt | ./mapper.sh
     145~$ hadoop fs -rmr output-*
     146}}}
     147 * 接著用 hadoop streaming 來執行
     148{{{
     149~$ hadoop jar hadoop-streaming.jar -input lab9_input -output lab9_out2 -mapper mapper.sh -file mapper.sh
     150}}}