| | 1 | = icas 0.1版完成 = |
| | 2 | |
| | 3 | {{{ |
| | 4 | #!java |
| | 5 | /** |
| | 6 | * Program SnortProduce.java |
| | 7 | * Editor: Waue Chen |
| | 8 | * From : GTD. NCHC. Taiwn |
| | 9 | * Last Update Date: 07/29/2009 |
| | 10 | * support version : hadoop 0.18.3, hbase 0.18.1 |
| | 11 | * |
| | 12 | |
| | 13 | package tw.org.nchc.code; |
| | 14 | |
| | 15 | import java.io.BufferedReader; |
| | 16 | import java.io.IOException; |
| | 17 | import java.io.InputStreamReader; |
| | 18 | import java.util.Iterator; |
| | 19 | |
| | 20 | import org.apache.hadoop.conf.Configuration; |
| | 21 | import org.apache.hadoop.conf.Configured; |
| | 22 | import org.apache.hadoop.fs.FileSystem; |
| | 23 | import org.apache.hadoop.fs.Path; |
| | 24 | import org.apache.hadoop.hbase.HBaseConfiguration; |
| | 25 | import org.apache.hadoop.hbase.HColumnDescriptor; |
| | 26 | import org.apache.hadoop.hbase.HTableDescriptor; |
| | 27 | import org.apache.hadoop.hbase.client.HBaseAdmin; |
| | 28 | import org.apache.hadoop.hbase.client.HTable; |
| | 29 | import org.apache.hadoop.hbase.io.BatchUpdate; |
| | 30 | import org.apache.hadoop.hbase.io.ImmutableBytesWritable; |
| | 31 | import org.apache.hadoop.hbase.mapred.TableReduce; |
| | 32 | import org.apache.hadoop.hbase.util.Bytes; |
| | 33 | import org.apache.hadoop.io.LongWritable; |
| | 34 | import org.apache.hadoop.io.Text; |
| | 35 | import org.apache.hadoop.mapred.FileInputFormat; |
| | 36 | import org.apache.hadoop.mapred.FileOutputFormat; |
| | 37 | import org.apache.hadoop.mapred.JobClient; |
| | 38 | import org.apache.hadoop.mapred.JobConf; |
| | 39 | import org.apache.hadoop.mapred.MapReduceBase; |
| | 40 | import org.apache.hadoop.mapred.Mapper; |
| | 41 | import org.apache.hadoop.mapred.OutputCollector; |
| | 42 | import org.apache.hadoop.mapred.Reducer; |
| | 43 | import org.apache.hadoop.mapred.Reporter; |
| | 44 | import org.apache.hadoop.util.Tool; |
| | 45 | import org.apache.hadoop.util.ToolRunner; |
| | 46 | |
| | 47 | public class ICAS extends Configured implements Tool { |
| | 48 | |
| | 49 | HBaseConfiguration hbase_conf; |
| | 50 | HBaseAdmin hbase_admin; |
| | 51 | |
| | 52 | public ICAS() throws IOException { |
| | 53 | hbase_conf = new HBaseConfiguration(); |
| | 54 | hbase_admin = new HBaseAdmin(hbase_conf); |
| | 55 | |
| | 56 | } |
| | 57 | |
| | 58 | public static class ICAS_M extends MapReduceBase implements |
| | 59 | Mapper<LongWritable, Text, Text, Text> { |
| | 60 | |
| | 61 | String getip(String str) { |
| | 62 | String[] ret = str.split(":"); |
| | 63 | return ret[0]; |
| | 64 | } |
| | 65 | |
| | 66 | public void map(LongWritable key, Text value, |
| | 67 | OutputCollector<Text, Text> output, Reporter reporter) |
| | 68 | throws IOException { |
| | 69 | // [3] sig [4]class [5]y [6]m [7]d [8]h [9]M [10]s [11]s [12]d [13]t |
| | 70 | |
| | 71 | String line = value.toString(); |
| | 72 | String[] str = line.split(";"); |
| | 73 | String source_ip = getip(str[11]); |
| | 74 | String dest_ip = getip(str[12]); |
| | 75 | String fkey = dest_ip ; |
| | 76 | |
| | 77 | String date = str[5] + "/" + str[6] + "/" + str[7] + "_" + str[8] |
| | 78 | + ":" + str[9] + ":" + str[10]; |
| | 79 | // source @ date @ sig @ class @ type |
| | 80 | String fvalue = source_ip + "@" + date + "@" + str[3] +"@" +str[4] |
| | 81 | + "@"+ str[13]; |
| | 82 | output.collect(new Text(fkey), new Text(fvalue)); |
| | 83 | } |
| | 84 | } |
| | 85 | |
| | 86 | public static class ICAS_R extends TableReduce<Text, Text> { |
| | 87 | |
| | 88 | public void reduce(Text key, Iterator<Text> values, |
| | 89 | OutputCollector<ImmutableBytesWritable, BatchUpdate> output, |
| | 90 | Reporter reporter) throws IOException { |
| | 91 | HTable table = new HTable("ICAS"); |
| | 92 | String source_ip; |
| | 93 | String date; |
| | 94 | String sig_class; |
| | 95 | String type; |
| | 96 | String signature; |
| | 97 | |
| | 98 | String rawstr = new String(values.next().getBytes()); |
| | 99 | String[] str = rawstr.split("@"); |
| | 100 | source_ip = str[0]; |
| | 101 | date = str[1]; |
| | 102 | signature = str[2]; |
| | 103 | sig_class = str[3]; |
| | 104 | type = str[4]; |
| | 105 | // values.next().getByte() can get value and transfer to byte form, |
| | 106 | while (values.hasNext()) { |
| | 107 | // source_ip + "@" + date + "@" + sig + "@" + class + "@" + type; |
| | 108 | rawstr = new String(values.next().getBytes()); |
| | 109 | str = rawstr.split("@"); |
| | 110 | source_ip = source_ip + " ; " + str[0]; |
| | 111 | date = date + " ; " + str[1]; |
| | 112 | signature = signature + ";" + str[2]; |
| | 113 | } |
| | 114 | reporter.setStatus("amp emitting cell for row'" + key.toString() |
| | 115 | + "'"); |
| | 116 | BatchUpdate map = new BatchUpdate(key.toString()); |
| | 117 | // map.setTimestamp(timestamp); |
| | 118 | map.put("infor:source_ip", Bytes.toBytes(source_ip)); |
| | 119 | map.put("infor:signature", Bytes.toBytes(signature)); |
| | 120 | map.put("infor:date", Bytes.toBytes(date)); |
| | 121 | map.put("infor:class", Bytes.toBytes(sig_class)); |
| | 122 | map.put("infor:type", Bytes.toBytes(type)); |
| | 123 | table.commit(map); |
| | 124 | // ImmutableBytesWritable Hkey = new ImmutableBytesWritable(Bytes |
| | 125 | // .toBytes(key.toString())); |
| | 126 | // output.collect(Hkey, map); |
| | 127 | |
| | 128 | } |
| | 129 | } |
| | 130 | |
| | 131 | public static class ICAS_T extends MapReduceBase implements |
| | 132 | Reducer<Text, Text, Text, Text> { |
| | 133 | |
| | 134 | public void reduce(Text key, Iterator<Text> values, |
| | 135 | OutputCollector<Text, Text> output, Reporter reporter) |
| | 136 | throws IOException { |
| | 137 | |
| | 138 | StringBuilder ret = new StringBuilder("\n"); |
| | 139 | while (values.hasNext()) { |
| | 140 | String v = values.next().toString().trim(); |
| | 141 | if (v.length() > 0) |
| | 142 | ret.append(v + "\n"); |
| | 143 | } |
| | 144 | output.collect((Text) key, new Text(ret.toString())); |
| | 145 | } |
| | 146 | } |
| | 147 | |
| | 148 | /** |
| | 149 | * A reducer class that just emits the sum of the input values. |
| | 150 | */ |
| | 151 | |
| | 152 | public boolean checkTableExist(String table_name) throws IOException { |
| | 153 | if (!hbase_admin.tableExists(table_name)) { |
| | 154 | return false; |
| | 155 | } |
| | 156 | return true; |
| | 157 | } |
| | 158 | |
| | 159 | // create Hbase table , success = 1 ; failse = 0; Table_exist = 2; |
| | 160 | public boolean createTable(String table_name, String[] column_family) |
| | 161 | throws IOException { |
| | 162 | // check whether Table name exite or not |
| | 163 | if (!checkTableExist(table_name)) { |
| | 164 | HTableDescriptor tableDesc = new HTableDescriptor(table_name); |
| | 165 | for (int i = 0; i < column_family.length; i++) { |
| | 166 | String cf = column_family[i]; |
| | 167 | // check and correct name format "string:" |
| | 168 | if (!cf.endsWith(":")) { |
| | 169 | column_family[i] = cf + ":"; |
| | 170 | } |
| | 171 | // add column family |
| | 172 | tableDesc.addFamily(new HColumnDescriptor(column_family[i])); |
| | 173 | } |
| | 174 | hbase_admin.createTable(tableDesc); |
| | 175 | return true; |
| | 176 | } else { |
| | 177 | return false; |
| | 178 | } |
| | 179 | } |
| | 180 | |
| | 181 | static boolean hdfsput(String source_file, String text_tmp) { |
| | 182 | try { |
| | 183 | Process p = Runtime.getRuntime().exec( |
| | 184 | "hadoop dfs -put " + source_file + " " + text_tmp); |
| | 185 | BufferedReader in = new BufferedReader(new InputStreamReader(p |
| | 186 | .getErrorStream())); |
| | 187 | String err = null; |
| | 188 | while ((err = in.readLine()) != null) { |
| | 189 | System.err.println(err); |
| | 190 | } |
| | 191 | System.err.println("finish"); |
| | 192 | return true; |
| | 193 | } catch (Exception e) { |
| | 194 | e.printStackTrace(); |
| | 195 | return false; |
| | 196 | } |
| | 197 | } |
| | 198 | |
| | 199 | int printUsage() { |
| | 200 | System.out |
| | 201 | .println("ICAS <sourceFile> <tempFile> <tableName> <mapNumber> <reduceNumber> "); |
| | 202 | ToolRunner.printGenericCommandUsage(System.out); |
| | 203 | return -1; |
| | 204 | } |
| | 205 | |
| | 206 | public int run(String[] args) throws Exception { |
| | 207 | |
| | 208 | Path text_path = new Path(args[1]); // text path |
| | 209 | Path output = new Path("/tmp/output"); // /tmp/output |
| | 210 | |
| | 211 | /* set conf */ |
| | 212 | JobConf conf = new JobConf(getConf(), ICAS.class); |
| | 213 | conf.setJobName("SnortProduce"); |
| | 214 | |
| | 215 | // key point |
| | 216 | conf.setOutputKeyClass(Text.class); |
| | 217 | conf.setOutputValueClass(Text.class); |
| | 218 | |
| | 219 | /* set mapper and reducer */ |
| | 220 | conf.setMapperClass(ICAS_M.class); |
| | 221 | conf.setReducerClass(ICAS_R.class); |
| | 222 | // conf.setMapperClass(IdentityMapper.class); |
| | 223 | // conf.setReducerClass(IdentityReducer.class); |
| | 224 | |
| | 225 | /* delete previous output dir */ |
| | 226 | if (FileSystem.get(conf).exists(output)) |
| | 227 | FileSystem.get(conf).delete(output, true); |
| | 228 | conf.setNumMapTasks(Integer.parseInt(args[3])); |
| | 229 | conf.setNumReduceTasks(Integer.parseInt(args[4])); |
| | 230 | |
| | 231 | /* set input path */ |
| | 232 | FileInputFormat.setInputPaths(conf, text_path); |
| | 233 | FileOutputFormat.setOutputPath(conf, output); |
| | 234 | |
| | 235 | /* run */ |
| | 236 | JobClient.runJob(conf); |
| | 237 | |
| | 238 | return 0; |
| | 239 | } |
| | 240 | |
| | 241 | public static void main(String[] args) throws Exception { |
| | 242 | /* Major parameter */ |
| | 243 | // it indicates local path, not hadoop file system path |
| | 244 | String source_file = "/home/waue/log"; |
| | 245 | // data source tmp |
| | 246 | String text_tmp = "/user/waue/parsed"; |
| | 247 | /* Minor parameter */ |
| | 248 | String table_name = "ICAS"; |
| | 249 | |
| | 250 | String[] argv = { source_file, text_tmp, table_name, "1", "1" }; |
| | 251 | args = argv; |
| | 252 | |
| | 253 | /* build table */ |
| | 254 | String[] col_family = { "infor:" }; |
| | 255 | BuildHTable build_table = new BuildHTable(); |
| | 256 | if (!build_table.checkTableExist(args[2])) { |
| | 257 | if (!build_table.createTable(args[2], col_family)) { |
| | 258 | System.err.println("create table error !"); |
| | 259 | } |
| | 260 | } else { |
| | 261 | System.err.println("Table \"" + args[2] |
| | 262 | + "\" has already existed !"); |
| | 263 | } |
| | 264 | |
| | 265 | // if (!hdfsput(args[0], args[1])) { |
| | 266 | // System.out.println("directory is existed."); |
| | 267 | // } |
| | 268 | |
| | 269 | int res = ToolRunner.run(new Configuration(), new ICAS(), args); |
| | 270 | System.exit(res); |
| | 271 | } |
| | 272 | } |
| | 273 | |
| | 274 | }}} |
| | 275 | |
| | 276 | |