= 2010-05-10 = == Hadoop : Matrix Multiply 用 MapReduce 作矩陣運算 == * 測試 [http://homepage.mac.com/j.norstad/matrix-multiply/index.html A MapReduce Algorithm for Matrix Multiplication] 所提供之矩陣相乘範例 * 發現會有 IOException 錯誤訊息,肇因於 /tmp/MatrixMultiply/out/_logs 是目錄而不是檔案 {{{ 10/05/10 15:23:23 INFO input.FileInputFormat: Total input paths to process : 1 10/05/10 15:23:23 INFO mapred.JobClient: Running job: job_201005101012_0016 10/05/10 15:23:24 INFO mapred.JobClient: map 0% reduce 0% 10/05/10 15:23:33 INFO mapred.JobClient: map 100% reduce 0% 10/05/10 15:23:45 INFO mapred.JobClient: map 100% reduce 100% 10/05/10 15:23:47 INFO mapred.JobClient: Job complete: job_201005101012_0016 .......... Exception in thread "main" java.io.IOException: Cannot open filename /tmp/MatrixMultiply/out/_logs at org.apache.hadoop.hdfs.DFSClient$DFSInputStream.openInfo(DFSClient.java:1497) }}} * [解法] 修改 !TestMatrixMultiply.java,並以單機 !LocalRunner 執行。( hadoop 0.20.2 預設用 !LocalRunner ) {{{ jazz@drbl:~$ wget http://ftp.twaren.net/Unix/Web/apache/hadoop/core/hadoop-0.20.2/hadoop-0.20.2.tar.gz jazz@drbl:~$ tar zxvf hadoop-0.20.2.tar.gz jazz@drbl:~$ cd hadoop-0.20.2 jazz@drbl:~/hadoop-0.20.2$ echo "export JAVA_HOME=/usr/lib/jvm/java-6-sun/" >> conf/hadoop-env.sh jazz@drbl:~/hadoop-0.20.2$ wget http://trac.nchc.org.tw/grid/raw-attachment/wiki/jazz/10-05-10/matrix.tar.gz jazz@drbl:~/hadoop-0.20.2$ tar zxvf matrix.tar.gz jazz@drbl:~/hadoop-0.20.2$ cd matrix/ jazz@drbl:~/hadoop-0.20.2/matrix$ ant jazz@drbl:~/hadoop-0.20.2/matrix$ mv matrix.jar ../. jazz@drbl:~/hadoop-0.20.2/matrix$ cd .. jazz@drbl:~/hadoop-0.20.2$ bin/hadoop jar matrix.jar TestMatrixMultiply }}} {{{ #!diff --- source/TestMatrixMultiply.java 2009-12-12 23:00:03.000000000 +0800 +++ matrix/src/TestMatrixMultiply.java 2010-05-11 00:25:02.000000000 +0800 @@ -72,13 +72,11 @@ for (int i = 0; i < rowDim; i++) for (int j = 0; j < colDim; j++) result[i][j] = 0; - if (fs.isFile(path)) { - fillMatrix(result, path); - } else { - FileStatus[] fileStatusArray = fs.listStatus(path); - for (FileStatus fileStatus : fileStatusArray) { - fillMatrix(result, fileStatus.getPath()); - } + FileStatus[] fileStatusArray = fs.listStatus(path); + for (FileStatus fileStatus : fileStatusArray) { + if (fs.isFile(fileStatus.getPath())) { + fillMatrix(result, fileStatus.getPath()); + } } return result; } @@ -100,10 +98,13 @@ public static void checkAnswer (int[][] A, int[][] B, int I, int K, int J) throws Exception { + System.out.println("......multiply(...)"); int[][] X = multiply(A, B, I, K, J); + System.out.println("......readMatrix("+I+","+J+","+OUTPUT_DIR_PATH); int[][] Y = readMatrix(I, J, OUTPUT_DIR_PATH); for (int i = 0; i < I; i++) { for (int j = 0; j < J; j++) { + System.out.println("......X["+i+"]["+j+"]="+X[i][j]+", Y["+i+"]["+j+"]="+Y[i][j]); if (X[i][j] != Y[i][j]) { throw new Exception("Bad answer!"); } @@ -135,8 +136,10 @@ int IB, int KB, int JB) throws Exception { + System.out.println("...MatrixMultiply.runJob(...)"); MatrixMultiply.runJob(conf, INPUT_PATH_A, INPUT_OATH_B, OUTPUT_DIR_PATH, TEMP_DIR_PATH, strategy, R1, R2, I, K, J, IB, KB, JB); + System.out.println("...checkAnswer(...)"); checkAnswer(A, B, I, K, J); } @@ -279,8 +282,8 @@ System.out.println("================"); System.out.println(); } finally { - fs.delete(new Path(DATA_DIR_PATH), true); + //fs.delete(new Path(DATA_DIR_PATH), true); } } }}}