Commit 8bf68212 authored by leroyq's avatar leroyq

salut

parent df2ebe10
This diff is collapsed.
......@@ -17,27 +17,31 @@ import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
public class OrderInversion extends Configured implements Tool {
private final static String ASTERISK = "\0";
public static class PartitionerTextPair extends
Partitioner<TextPair, IntWritable> {
public static class PartitionerTextPair extends Partitioner<TextPair, IntWritable> {
@Override
public int getPartition(TextPair key, IntWritable value,
int numPartitions) {
// TODO: implement getPartition such that pairs with the same first element will go to the same reducer. You can use toUnsighed as utility.
return 0;
// TODO: implement getPartition such that pairs with the same first element
// will go to the same reducer. You can use toUnsigned as utility.
return toUnsigned(key.first.toString().hashCode()) % numPartitions;
}
/**
* toUnsigned(10) = 10
* toUnsigned(-1) = 2147483647
* toUnsigned(10) = 10 toUnsigned(-1) = 2147483647
*
* @param val Value to convert
* @param val
* Value to convert
* @return the unsigned number with the same bits of val
*/
* */
public static int toUnsigned(int val) {
return val & Integer.MAX_VALUE;
}
......@@ -50,16 +54,64 @@ public class OrderInversion extends Configured implements Tool {
public void map(LongWritable key, Text value, Context context)
throws java.io.IOException, InterruptedException {
// TODO: implement the map method
int count;
String line[] = value.toString().split("[^a-zA-Z]+");
ArrayList<String> array = new ArrayList<String>();
for (String word : line) {
if (word.trim().length() == 0)
continue;
word = word.toLowerCase();
array.add(word.trim());
}
for (String k1 : array) {
count = 0; // Counter of the same word pairs, like [<w1, w2>, <w1, w3>] = 2
for (String k2 : array) {
if (k1.compareTo(k2) != 0) {
count++;
TextPair p = new TextPair();
p.set(new Text(k1), new Text(k2));
context.write(p, new IntWritable(1));
}
}
TextPair p = new TextPair();
p.set(new Text(k1), new Text(ASTERISK));
context.write(p, new IntWritable(count));
}
}
}
public static class PairReducer extends
Reducer<TextPair, IntWritable, TextPair, DoubleWritable> {
// TODO: implement the reduce method
double wordCount;
@Override
protected void reduce(TextPair key, // TODO: change Object to input key type
Iterable<IntWritable> values, // TODO: change Object to input value type
Context context) throws IOException, InterruptedException {
Iterator<IntWritable> it = values.iterator();
double count = 0;
while (it.hasNext()) {
count += it.next().get();
}
if(key.second.toString().compareTo(ASTERISK) == 0){
wordCount = count;
}
else {
TextPair p = new TextPair();
p.set(new Text(key.first), new Text(key.second));
context.write(p, new DoubleWritable(count/wordCount));
}
// TextPair p = new TextPair();
// p.set(new Text(key.key1), new Text(key.key2));
// context.write(p, new DoubleWritable(count));
}
}
private int numReducers;
private Path inputPath;
private Path outputDir;
......@@ -67,30 +119,33 @@ public class OrderInversion extends Configured implements Tool {
@Override
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
Job job = null; // TODO: define new job instead of null using conf
job = Job.getInstance(conf); //Done
Job job = new Job(conf); // TODO: define new job instead of null using conf e setting a name
// TODO: set job input format
job.setInputFormatClass(TextInputFormat.class);//Done
job.setInputFormatClass(TextInputFormat.class);
// TODO: set map class and the map output key and value classes
job.setMapperClass(PairMapper.class);//Done
job.setMapOutputKeyClass(Text.class);//Done
job.setMapOutputValueClass(LongWritable.class);//Done
job.setMapperClass(PairMapper.class);
job.setMapOutputKeyClass(TextPair.class);
job.setMapOutputValueClass(IntWritable.class);
// TODO: set reduce class and the reduce output key and value classes
job.setReducerClass(PairReducer.class);//Done
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
job.setReducerClass(PairReducer.class);
job.setOutputKeyClass(TextPair.class);
job.setOutputValueClass(DoubleWritable.class);
// TODO: set job output format
job.setOutputFormatClass(TextOutputFormat.class);//Done
job.setOutputFormatClass(TextOutputFormat.class);
job.setPartitionerClass(PartitionerTextPair.class);
// TODO: add the input file as job input (from HDFS) to the variable
TextInputFormat.addInputPath(job, new Path(args[1]));//Done
// inputPath
// TODO: set the output path for the job results (to HDFS) to the variable
TextOutputFormat.setOutputPath(job, new Path(args[2]));//Done
// outputPath
// inputFile
TextInputFormat.setInputPaths(job, this.inputPath);
// TODO: set the output path for the job results (to HDFS) to the
// variable
// outputPath
TextOutputFormat.setOutputPath(job, this.outputDir);
// TODO: set the number of reducers using variable numberReducers
job.setNumReduceTasks(this.numReducers);//Done
job.setNumReduceTasks(this.numReducers);
// TODO: set the jar class
job.setJarByClass(OrderInversion.class);
job.setJarByClass(Pair.class);
return job.waitForCompletion(true) ? 0 : 1;
}
......
-------------------------------------------------------------------------------
Test set: fr.eurecom.dsg.mapreduce.WordCountTest
-------------------------------------------------------------------------------
Tests run: 1, Failures: 0, Errors: 0, Skipped: 0, Time elapsed: 0.027 sec
Tests run: 1, Failures: 0, Errors: 0, Skipped: 0, Time elapsed: 0.028 sec
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment