Commit 19b01b2c authored by dangq's avatar dangq

StringtoIntMap.java

parent 584b40fb
This diff is collapsed.
......@@ -94,7 +94,7 @@ public class Pair extends Configured implements Tool {
// TODO: set job input format
Configuration conf = this.getConf();
Job job = new Job(conf,"Word Count");
Job job = new Job(conf,"PAIR");
job.setInputFormatClass(TextInputFormat.class);
......
package fr.eurecom.dsg.mapreduce;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.TreeSet;
import org.apache.hadoop.io.Writable;
/*
* Very simple (and scholastic) implementation of a Writable associative array for String to Int
*
**/
public class StringToIntMapWritable implements Writable {
// TODO: add an internal field that is the real associative array
//private HashMap<String, Integer> hm = new HashMap<String, Integer>();
public final Map<String, Integer> counts;
public StringToIntMapWritable(Map<String, Integer> counts) {
this.counts = counts;
}
public StringToIntMapWritable(){
this(new HashMap<String, Integer>());
}
@Override
public int hashCode(){
final int prime=31;
int result =1;
result= prime * result + ((counts==null)? 0: counts.hashCode());
return result;
}
@Override
public boolean equals(Object obj){
if (this==obj)
return true;
if (obj==null)
return false;
if (getClass()!=obj.getClass())
return false;
StringToIntMapWritable other =(StringToIntMapWritable) obj;
if (counts==null){
if (other.counts!=null)
return false;
} else if (!counts.equals(other.counts))
return false;
return true;
}
@Override
public void readFields(DataInput in) throws IOException {
// TODO: implement deserialization
counts.clear();
String inLine=in.readLine();
if (inLine!=null){
StringTokenizer tokenizer = new StringTokenizer(inLine," ");
while (tokenizer.hasMoreElements()){
String occurence = tokenizer.nextToken();
String occurenceWord = occurence.substring(0,occurence.lastIndexOf("-"));
int occurenceCount = Integer.parseInt(occurence.substring(occurence.lastIndexOf("-")+1));
counts.put(occurenceWord,occurenceCount);
}
}
}
// Warning: for efficiency reasons, Hadoop attempts to re-use old instances of
// StringToIntMapWritable when reading new records. Remember to initialize your variables
// inside this function, in order to get rid of old data.
@Override
public void write(DataOutput out) throws IOException {
// TODO: implement serialization
for (String s : counts.keySet()) {
out.write((s + "-" + counts.get(s) + " ").getBytes());
}
}
@Override
public String toString(){
StringBuffer s = new StringBuffer();
for (String key: new TreeSet<String>(counts.keySet())){
s.append((key+"-"+counts.get(key)+" "));
}
return s.toString();
}
}
......@@ -5,9 +5,16 @@ import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
......@@ -22,16 +29,28 @@ public class Stripes extends Configured implements Tool {
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
Job job = null; // TODO: define new job instead of null using conf e setting a name
// TODO: set job input format
//Job job = null; // TODO: define new job instead of null using conf e setting a name
Job job = new Job(conf,"STRIPES");
// TODO: set job input format
job.setInputFormatClass(TextInputFormat.class);
// TODO: set map class and the map output key and value classes
job.setMapperClass(StripesMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
// TODO: set reduce class and the reduce output key and value classes
job.setReducerClass(StripesReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
// TODO: set job output format
job.setOutputFormatClass(TextOutputFormat.class);
// TODO: add the input file as job input (from HDFS) to the variable inputFile
FileInputFormat.addInputPath(job, this.inputPath);
// TODO: set the output path for the job results (to HDFS) to the variable outputPath
FileOutputFormat.setOutputPath(job, this.outputDir);
// TODO: set the number of reducers using variable numberReducers
job.setNumReduceTasks(this.numReducers);
// TODO: set the jar class
job.setJarByClass(WordCount.class);
return job.waitForCompletion(true) ? 0 : 1;
}
......@@ -53,29 +72,41 @@ public class Stripes extends Configured implements Tool {
}
class StripesMapper
extends Mapper<Object, // TODO: change Object to input key type
Object, // TODO: change Object to input value type
Object, // TODO: change Object to output key type
Object> { // TODO: change Object to output value type
extends Mapper<LongWritable, // TODO: change Object to input key type
Text, // TODO: change Object to input value type
TextPair, // TODO: change Object to output key type
IntWritable> { // TODO: change Object to output value type
@Override
public void map(Object key, // TODO: change Object to input key type
Object value, // TODO: change Object to input value type
public void map(LongWritable key, // TODO: change Object to input key type
Text value, // TODO: change Object to input value type
Context context)
throws java.io.IOException, InterruptedException {
// TODO: implement map method
String line = this.toString();
line = line.replaceAll("[^a-zA-Z0-9_]+", " ");
line = line.replaceAll("^\\s+", "");
String[] words = line.split("\\s+");
for (int i = 0; i < words.length - 1; i++) {
String first = words[i];
for (int j = 0; j < words.length - 1; j++) {
if (i != j) {
String second = words[i + 1];
context.write(new TextPair(first, second), new IntWritable(1));
}
}
}
// TODO: implement map method
}
}
class StripesReducer
extends Reducer<Object, // TODO: change Object to input key type
Object, // TODO: change Object to input value type
Object, // TODO: change Object to output key type
Object> { // TODO: change Object to output value type
extends Reducer<TextPair, // TODO: change Object to input key type
IntWritable, // TODO: change Object to input value type
TextPair, // TODO: change Object to output key type
IntWritable> { // TODO: change Object to output value type
@Override
public void reduce(Object key, // TODO: change Object to input key type
Iterable<Object> values, // TODO: change Object to input value type
public void reduce(TextPair key, // TODO: change Object to input key type
Iterable<IntWritable> values, // TODO: change Object to input value type
Context context) throws IOException, InterruptedException {
// TODO: implement the reduce method
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment