source: sample/hadoop-0.16/tw/org/nchc/demo/SequentialPageRank.java @ 246

Last change on this file since 246 was 21, checked in by waue, 16 years ago

hadoop 0.16

File size: 3.6 KB
Line 
1/*
2 * Cloud9: A MapReduce Library for Hadoop
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you
5 * may not use this file except in compliance with the License. You may
6 * obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13 * implied. See the License for the specific language governing
14 * permissions and limitations under the License.
15 */
16
17package tw.org.nchc.demo;
18
19import java.io.BufferedReader;
20import java.io.FileInputStream;
21import java.io.IOException;
22import java.io.InputStreamReader;
23import java.util.Collection;
24import java.util.List;
25
26import edu.uci.ics.jung.algorithms.cluster.WeakComponentGraphClusterer;
27import edu.uci.ics.jung.algorithms.importance.PageRank;
28import edu.uci.ics.jung.algorithms.importance.Ranking;
29import edu.uci.ics.jung.graph.DirectedSparseGraph;
30import edu.uci.ics.jung.graph.Graph;
31
32/**
33 * <p>
34 * Program that computes PageRank for a graph using the <a
35 * href="http://jung.sourceforge.net/">JUNG</a> package (2.0 alpha1). Program
36 * takes two command-line arguments: the first is a file containing the graph
37 * data, and the second is the random jump factor (a typical setting is 0.15).
38 * </p>
39 *
40 * <p>
41 * The graph should be represented as an adjacency list. Each line should have
42 * at least one token; tokens should be tab delimited. The first token
43 * represents the unique id of the source node; subsequent tokens represent its
44 * link targets (i.e., outlinks from the source node). For completeness, there
45 * should be a line representing all nodes, even nodes without outlinks (those
46 * lines will simply contain one token, the source node id).
47 * </p>
48 *
49 */
50public class SequentialPageRank {
51
52  private SequentialPageRank() {
53  }
54
55  /**
56   * Runs the program
57   */
58  public static void main(String[] args) throws IOException {
59    if (args.length != 2) {
60      System.err
61          .println("usage: SequentialPageRage [graph-adjacency-list] [random-jump-factor]");
62      System.exit(-1);
63    }
64    String infile = args[0];
65    float alpha = Float.parseFloat(args[1]);
66
67    int edgeCnt = 0;
68    DirectedSparseGraph<String, Integer> graph = new DirectedSparseGraph<String, Integer>();
69
70    BufferedReader data = new BufferedReader(new InputStreamReader(
71        new FileInputStream(infile)));
72
73    String line;
74    while ((line = data.readLine()) != null) {
75      line.trim();
76      String[] arr = line.split("\\t");
77
78      for (int i = 1; i < arr.length; i++) {
79        graph.addEdge(new Integer(edgeCnt++), arr[0], arr[i]);
80      }
81    }
82
83    data.close();
84
85    WeakComponentGraphClusterer<String, Integer> clusterer = new WeakComponentGraphClusterer<String, Integer>();
86
87    Collection<Graph<String, Integer>> components = clusterer
88        .transform(graph);
89    int numComponents = components.size();
90    System.out.println("Number of components: " + numComponents);
91    System.out.println("Number of edges: " + graph.getEdgeCount());
92    System.out.println("Number of nodes: " + graph.getVertexCount());
93    System.out.println("Random jump factor: " + alpha);
94
95    PageRank<String, Integer> ranker = new PageRank<String, Integer>(graph,
96        alpha);
97    ranker.evaluate();
98
99    System.out.println("\nPageRank of nodes, in descending order:");
100    for (Ranking<?> s : (List<Ranking<?>>) ranker.getRankings()) {
101      String pmid = s.getRanked().toString();
102
103      System.out.println(pmid + " " + s.rankScore);
104    }
105  }
106
107}
Note: See TracBrowser for help on using the repository browser.