/**
 * Program: LogParserGo.java
 * Editor: Waue Chen 
 * From :  NCHC. Taiwn
 * Last Update Date: 07/02/2008
 */
/**
 * Purpose : 
 * 	This program will parse your apache log and store it into Hbase.
 * 
 * HowToUse : 
 * 	Make sure two thing :
 * 	1. Upload apache logs ( /var/log/apache2/access.log* ) to \ 
 * 		hdfs (default: /user/waue/apache-log) \
 * 	 $ bin/hadoop dfs -put /var/log/apache2/ apache-log
 * 	2. parameter "dir" in main contains the logs.
 *  3. you should filter the exception contents manually, \ 
 *  	ex:  ::1 - - [29/Jun/2008:07:35:15 +0800] "GET / HTTP/1.0" 200 729 "...
 *  
 * Check Result:
 * 	Go to hbase console, type : 
 * 		hql > select * from apache-log;

 +-------------------------+-------------------------+-------------------------+
 | Row                     | Column                  | Cell                    |
 +-------------------------+-------------------------+-------------------------+
 | 118.170.101.250         | http:agent              | Mozilla/4.0 (compatible;|
 |                         |                         |  MSIE 4.01; Windows 95) |
 +-------------------------+-------------------------+-------------------------+
 | 118.170.101.250         | http:bytesize           | 318                     |
 +-------------------------+-------------------------+-------------------------+
 ..........(skip)........
 +-------------------------+-------------------------+-------------------------+
 | 87.65.93.58             | http:method             | OPTIONS                 |
 +-------------------------+-------------------------+-------------------------+
 | 87.65.93.58             | http:protocol           | HTTP/1.1                |
 +-------------------------+-------------------------+-------------------------+
 | 87.65.93.58             | referrer:-              | *                       |
 +-------------------------+-------------------------+-------------------------+
 | 87.65.93.58             | url:*                   | -                       |
 +-------------------------+-------------------------+-------------------------+
 31 row(s) in set. (0.58 sec)



 */
package tw.org.nchc.code;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Locale;

import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseAdmin;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTable;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.ClusterStatus;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;

class Log {

	public Log(String data) throws Exception {

		String[] arr = data.split(";");

		this.gid = arr[0];
		this.sid = arr[1];
		this.version = arr[2];
		this.alert_name = arr[3];
		this.class_type = arr[4];
		this.priority = arr[5];
		// this.timestamp = "2008" + arr[6] + arr[7] + arr[8] + arr[9] +
		// arr[10];
		this.timestamp = getTime(arr[7] + "/" + arr[6] + "/2008:" + arr[8]
				+ ":" + arr[9] + ":" + arr[10]);
		this.source = arr[11];
		this.destination = arr[12];
		this.type = arr[13];
		this.ttl = arr[14];
		this.tos = arr[15];
		this.id = arr[16];
		this.iplen = arr[17];
		this.dgmlen = arr[18];

	}

	long timestamp;

	String gid, sid, version;

	String alert_name, class_type, priority;

	String source, destination, type, ttl, tos, id, iplen, dgmlen;

	long getTime(String str) throws Exception {
		SimpleDateFormat sdf = new SimpleDateFormat("dd/MMM/yyyy:HH:mm:ss",
				Locale.TAIWAN);
		Long timestamp = sdf.parse(str).getTime();
		return timestamp;
	}
}

// import AccessLogParser
public class SnortBase {
	static HBaseConfiguration conf = new HBaseConfiguration();

	public static final String TABLE = "table.name";

	static String tableName = "mySnort";

	static HTable table = null;


	public static class MapClass extends MapReduceBase implements
			Mapper<WritableComparable, Text, Text, Writable> {

		@Override
		// MapReduceBase.configure(JobConf job)
		// Default implementation that does nothing.
		public void configure(JobConf job) {
			// String get(String name,String defaultValue)
			// Get the value of the name property. If no such property exists,\
			// then defaultValue is returned.
		}

		public void map(WritableComparable key, Text value,
				OutputCollector<Text, Writable> output, Reporter reporter)
				throws IOException {

			try {

				Log log = new Log(value.toString());
				
				// 查看value的值
				FileWriter out = new FileWriter(new File(
						"/home/waue/Desktop/snort-result.txt"));
				out.write(value.toString() + "_time=" + log.timestamp + "\n");
				out.flush();
				out.close();

				if (table == null)
					table = new HTable(conf, new Text(tableName));

				long lockId = table.startUpdate(new Text(log.destination));
				table.put(lockId, new Text("id:gid"), log.gid.getBytes());
				table.put(lockId, new Text("id:sid"), log.sid.getBytes());
				table.put(lockId, new Text("id:version"), log.version
						.getBytes());
				table.put(lockId, new Text("name:name"), log.alert_name
						.getBytes());
				table.put(lockId, new Text("name:class"), log.class_type
						.getBytes());
				table.put(lockId, new Text("index:priority"), log.priority
						.getBytes());
				table.put(lockId, new Text("index:soure"), log.source
						.getBytes());
				table
						.put(lockId, new Text("payload:type"), log.type
								.getBytes());
				table.put(lockId, new Text("payload:ttl"), log.ttl.getBytes());
				table.put(lockId, new Text("payload:tos"), log.tos.getBytes());
				table.put(lockId, new Text("payload:id"), log.id.getBytes());
				table.put(lockId, new Text("payload:iplen"), log.iplen
						.getBytes());
				table.put(lockId, new Text("payload:dgmlen"), log.dgmlen
						.getBytes());
				table.commit(lockId, log.timestamp);

			} catch (Exception e) {
				e.printStackTrace();
			}

		}
	}

	// do it to resolve warning : FileSystem.listPaths
	static public Path[] listPaths(FileSystem fsm, Path path)
			throws IOException {
		FileStatus[] fss = fsm.listStatus(path);
		int length = fss.length;
		Path[] pi = new Path[length];
		for (int i = 0; i < length; i++) {
			pi[i] = fss[i].getPath();
		}
		return pi;
	}

	public static void runMapReduce(String table, String inpath)
			throws IOException {
		Path tempDir = new Path("/tmp/Mylog/");
		Path InputPath = new Path(inpath);
		FileSystem fs = FileSystem.get(conf);
		JobConf jobConf = new JobConf(conf, SnortBase.class);
		jobConf.setJobName("Snort Parse");
		jobConf.set(TABLE, table);
		// 先省略 自動搜尋目錄的功能
		/*
		 * Path[] in = listPaths(fs, InputDir); if (fs.isFile(InputDir)) {
		 * jobConf.setInputPath(InputDir); } else { for (int i = 0; i <
		 * in.length; i++) { if (fs.isFile(in[i])) {
		 * jobConf.addInputPath(in[i]); } else { Path[] sub = listPaths(fs,
		 * in[i]); for (int j = 0; j < sub.length; j++) { if (fs.isFile(sub[j])) {
		 * jobConf.addInputPath(sub[j]); } } } } }
		 */
		jobConf.setInputPath(InputPath);
		jobConf.setOutputPath(tempDir);
		jobConf.setMapperClass(MapClass.class);
		JobClient client = new JobClient(jobConf);
		ClusterStatus cluster = client.getClusterStatus();
		jobConf.setNumMapTasks(cluster.getMapTasks());
		jobConf.setNumReduceTasks(0);
		JobClient.runJob(jobConf);
		fs.delete(tempDir);
		fs.close();
	}

	public static void creatTable(String table) throws IOException {
		HBaseAdmin admin = new HBaseAdmin(conf);
		if (!admin.tableExists(new Text(table))) {
			System.out.println("1. " + table
					+ " table creating ... please wait");
			HTableDescriptor tableDesc = new HTableDescriptor(table);
			tableDesc.addFamily(new HColumnDescriptor("id:"));
			tableDesc.addFamily(new HColumnDescriptor("name:"));
			tableDesc.addFamily(new HColumnDescriptor("index:"));
			tableDesc.addFamily(new HColumnDescriptor("payload:"));
			tableDesc.addFamily(new HColumnDescriptor("priority:"));
			admin.createTable(tableDesc);
		} else {
			System.out.println("1. " + table + " table already exists.");
		}
		System.out.println("2. access_log files fetching using map/reduce");
	}

	public static void main(String[] args) throws IOException, Exception {
		String table_name = "snort";
		String path = "/user/waue/alert_meta";

		// 先省略掉 parse完後自動上傳部份
		/*
		 * SnortParser sp = new
		 * SnortParser("/tmp/alert","/tmp/alert_SnortBase"); sp.parseToLine();
		 */
		creatTable(table_name);

		runMapReduce(table_name, path);

	}

}
