Move MRDP folder to root

adamjshook · Dec 7, 2012 · 6a0a877 · 6a0a877
1 parent ad85347
commit 6a0a877
Show file tree

Hide file tree

Showing 44 changed files with 6,744 additions and 6,744 deletions.
diff --git a/trunk/MRDP/src/main/java/mrdp/MRDPMain.java → MRDP/src/main/java/mrdp/MRDPMain.java b/trunk/MRDP/src/main/java/mrdp/MRDPMain.java → MRDP/src/main/java/mrdp/MRDPMain.java
@@ -1,65 +1,65 @@
-package mrdp;
-
-import java.util.Arrays;
-
-import mrdp.ch1.*;
-import mrdp.ch2.*;
-import mrdp.ch3.*;
-import mrdp.ch4.*;
-import mrdp.ch5.*;
-import mrdp.ch6.*;
-import mrdp.ch7.*;
-import mrdp.utils.MRDPUtils;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-@SuppressWarnings("unused")
-public class MRDPMain extends Configured implements Tool {
-
-	public static void main(String[] args) throws Exception {
-		System.exit(ToolRunner.run(new Configuration(), new MRDPMain(), args));
-	}
-
-	@Override
-	public int run(String[] args) throws Exception {
-		if (args.length > 0) {
-			String example = args[0];
-			String[] otherArgs = Arrays.copyOfRange(args, 1, args.length);
-
-			if (example.equalsIgnoreCase("PartitionPruningOutput")) {
-				PartitionPruningOutputDriver.main(otherArgs);
-			} else if (example.equalsIgnoreCase("PartitionPruningInput")) {
-				PartitionPruningInputDriver.main(otherArgs);
-			} else if (example.equalsIgnoreCase("RedisInput")) {
-				RedisInputDriver.main(otherArgs);
-			} else if (example.equalsIgnoreCase("RedisOutput")) {
-				RedisOutputDriver.main(otherArgs);
-			} else {
-				printHelp();
-				return 1;
-			}
-
-			return 0;
-		} else {
-			printHelp();
-			return 1;
-		}
-	}
-
-	private void printHelp() {
-		System.out
-				.println("Usage: hadoop jar mrdp.jar <example> <example args>");
-		System.out.println("Examples are:");
-		System.out.println("Chapter 7:");
-		System.out
-				.println("\tRedisOutput  <user data> <redis hosts> <hashset name>");
-		System.out
-				.println("\tRedisInput <redis hosts> <hashset name> <output>");
-		System.out.println("\tPartitionPruningOutput <user data>");
-		System.out
-				.println("\tPartitionPruningInput <last access months> <output>");
-	}
-}
+package mrdp;
+
+import java.util.Arrays;
+
+import mrdp.ch1.*;
+import mrdp.ch2.*;
+import mrdp.ch3.*;
+import mrdp.ch4.*;
+import mrdp.ch5.*;
+import mrdp.ch6.*;
+import mrdp.ch7.*;
+import mrdp.utils.MRDPUtils;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+@SuppressWarnings("unused")
+public class MRDPMain extends Configured implements Tool {
+
+	public static void main(String[] args) throws Exception {
+		System.exit(ToolRunner.run(new Configuration(), new MRDPMain(), args));
+	}
+
+	@Override
+	public int run(String[] args) throws Exception {
+		if (args.length > 0) {
+			String example = args[0];
+			String[] otherArgs = Arrays.copyOfRange(args, 1, args.length);
+
+			if (example.equalsIgnoreCase("PartitionPruningOutput")) {
+				PartitionPruningOutputDriver.main(otherArgs);
+			} else if (example.equalsIgnoreCase("PartitionPruningInput")) {
+				PartitionPruningInputDriver.main(otherArgs);
+			} else if (example.equalsIgnoreCase("RedisInput")) {
+				RedisInputDriver.main(otherArgs);
+			} else if (example.equalsIgnoreCase("RedisOutput")) {
+				RedisOutputDriver.main(otherArgs);
+			} else {
+				printHelp();
+				return 1;
+			}
+
+			return 0;
+		} else {
+			printHelp();
+			return 1;
+		}
+	}
+
+	private void printHelp() {
+		System.out
+				.println("Usage: hadoop jar mrdp.jar <example> <example args>");
+		System.out.println("Examples are:");
+		System.out.println("Chapter 7:");
+		System.out
+				.println("\tRedisOutput  <user data> <redis hosts> <hashset name>");
+		System.out
+				.println("\tRedisInput <redis hosts> <hashset name> <output>");
+		System.out.println("\tPartitionPruningOutput <user data>");
+		System.out
+				.println("\tPartitionPruningInput <last access months> <output>");
+	}
+}
diff --git a/...ava/mrdp/appendixA/BloomFilterDriver.java → ...ava/mrdp/appendixA/BloomFilterDriver.java b/...ava/mrdp/appendixA/BloomFilterDriver.java → ...ava/mrdp/appendixA/BloomFilterDriver.java
@@ -1,97 +1,97 @@
-package mrdp.appendixA;
-
-import java.io.BufferedReader;
-import java.io.InputStreamReader;
-import java.util.zip.GZIPInputStream;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.util.GenericOptionsParser;
-import org.apache.hadoop.util.bloom.BloomFilter;
-import org.apache.hadoop.util.bloom.Key;
-import org.apache.hadoop.util.hash.Hash;
-
-public class BloomFilterDriver {
-
-	public static void main(String[] args) throws Exception {
-		Configuration conf = new Configuration();
-		String[] otherArgs = new GenericOptionsParser(conf, args)
-				.getRemainingArgs();
-		if (otherArgs.length != 4) {
-			System.err
-					.println("Usage: BloomFilterWriter <inputfile> <nummembers> <falseposrate> <bfoutfile>");
-			System.exit(1);
-		}
-
-		FileSystem fs = FileSystem.get(new Configuration());
-
-		// Parse command line arguments
-		Path inputFile = new Path(otherArgs[0]);
-		int numMembers = Integer.parseInt(otherArgs[1]);
-		float falsePosRate = Float.parseFloat(otherArgs[2]);
-		Path bfFile = new Path(otherArgs[3]);
-
-		// Calculate our vector size and optimal K value based on approximations
-		int vectorSize = getOptimalBloomFilterSize(numMembers, falsePosRate);
-		int nbHash = getOptimalK(numMembers, vectorSize);
-
-		// create new Bloom filter
-		BloomFilter filter = new BloomFilter(vectorSize, nbHash,
-				Hash.MURMUR_HASH);
-
-		// Open file for read
-
-		System.out.println("Training Bloom filter of size " + vectorSize
-				+ " with " + nbHash + " hash functions, " + numMembers
-				+ " approximate number of records, and " + falsePosRate
-				+ " false positive rate");
-
-		String line = null;
-		int numRecords = 0;
-		for (FileStatus status : fs.listStatus(inputFile)) {
-			BufferedReader rdr;
-			// if file is gzipped, wrap it in a GZIPInputStream
-			if (status.getPath().getName().endsWith(".gz")) {
-				rdr = new BufferedReader(new InputStreamReader(
-						new GZIPInputStream(fs.open(status.getPath()))));
-			} else {
-				rdr = new BufferedReader(new InputStreamReader(fs.open(status
-						.getPath())));
-			}
-
-			System.out.println("Reading " + status.getPath());
-			while ((line = rdr.readLine()) != null) {
-				filter.add(new Key(line.getBytes()));
-				++numRecords;
-			}
-
-			rdr.close();
-		}
-
-		System.out.println("Trained Bloom filter with " + numRecords
-				+ " entries.");
-
-		System.out.println("Serializing Bloom filter to HDFS at " + bfFile);
-		FSDataOutputStream strm = fs.create(bfFile);
-		filter.write(strm);
-
-		strm.flush();
-		strm.close();
-
-		System.out.println("Done training Bloom filter.");
-	}
-
-	public static int getOptimalBloomFilterSize(int numRecords,
-			float falsePosRate) {
-		int size = (int) (-numRecords * (float) Math.log(falsePosRate) / Math
-				.pow(Math.log(2), 2));
-		return size;
-	}
-
-	public static int getOptimalK(float numMembers, float vectorSize) {
-		return (int) Math.round(vectorSize / numMembers * Math.log(2));
-	}
-}
+package mrdp.appendixA;
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.util.zip.GZIPInputStream;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hadoop.util.bloom.BloomFilter;
+import org.apache.hadoop.util.bloom.Key;
+import org.apache.hadoop.util.hash.Hash;
+
+public class BloomFilterDriver {
+
+	public static void main(String[] args) throws Exception {
+		Configuration conf = new Configuration();
+		String[] otherArgs = new GenericOptionsParser(conf, args)
+				.getRemainingArgs();
+		if (otherArgs.length != 4) {
+			System.err
+					.println("Usage: BloomFilterWriter <inputfile> <nummembers> <falseposrate> <bfoutfile>");
+			System.exit(1);
+		}
+
+		FileSystem fs = FileSystem.get(new Configuration());
+
+		// Parse command line arguments
+		Path inputFile = new Path(otherArgs[0]);
+		int numMembers = Integer.parseInt(otherArgs[1]);
+		float falsePosRate = Float.parseFloat(otherArgs[2]);
+		Path bfFile = new Path(otherArgs[3]);
+
+		// Calculate our vector size and optimal K value based on approximations
+		int vectorSize = getOptimalBloomFilterSize(numMembers, falsePosRate);
+		int nbHash = getOptimalK(numMembers, vectorSize);
+
+		// create new Bloom filter
+		BloomFilter filter = new BloomFilter(vectorSize, nbHash,
+				Hash.MURMUR_HASH);
+
+		// Open file for read
+
+		System.out.println("Training Bloom filter of size " + vectorSize
+				+ " with " + nbHash + " hash functions, " + numMembers
+				+ " approximate number of records, and " + falsePosRate
+				+ " false positive rate");
+
+		String line = null;
+		int numRecords = 0;
+		for (FileStatus status : fs.listStatus(inputFile)) {
+			BufferedReader rdr;
+			// if file is gzipped, wrap it in a GZIPInputStream
+			if (status.getPath().getName().endsWith(".gz")) {
+				rdr = new BufferedReader(new InputStreamReader(
+						new GZIPInputStream(fs.open(status.getPath()))));
+			} else {
+				rdr = new BufferedReader(new InputStreamReader(fs.open(status
+						.getPath())));
+			}
+
+			System.out.println("Reading " + status.getPath());
+			while ((line = rdr.readLine()) != null) {
+				filter.add(new Key(line.getBytes()));
+				++numRecords;
+			}
+
+			rdr.close();
+		}
+
+		System.out.println("Trained Bloom filter with " + numRecords
+				+ " entries.");
+
+		System.out.println("Serializing Bloom filter to HDFS at " + bfFile);
+		FSDataOutputStream strm = fs.create(bfFile);
+		filter.write(strm);
+
+		strm.flush();
+		strm.close();
+
+		System.out.println("Done training Bloom filter.");
+	}
+
+	public static int getOptimalBloomFilterSize(int numRecords,
+			float falsePosRate) {
+		int size = (int) (-numRecords * (float) Math.log(falsePosRate) / Math
+				.pow(Math.log(2), 2));
+		return size;
+	}
+
+	public static int getOptimalK(float numMembers, float vectorSize) {
+		return (int) Math.round(vectorSize / numMembers * Math.log(2));
+	}
+}