Commit c5b64585 authored by TRUONG Quang-Huy's avatar TRUONG Quang-Huy

WordCountIMC

parent 935cb767
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<project version="4"> <project version="4">
<component name="VcsDirectoryMappings"> <component name="VcsDirectoryMappings">
<mapping directory="" vcs="" /> <mapping directory="" vcs="Git" />
</component> </component>
</project> </project>
\ No newline at end of file
This diff is collapsed.
package fr.eurecom.dsg.mapreduce;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* Word Count example of MapReduce job. Given a plain text in input, this job
* counts how many occurrences of each word there are in that text and writes
* the result on HDFS.
*
*/
public class WordCountCombiner extends Configured implements Tool {
private int numReducers;
private Path inputPath;
private Path outputDir;
@Override
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
//Job job = null; // TODO: define new job instead of null using conf e setting a name
Job job = new Job(conf,"Word Count");
// TODO: set job input format
job.setInputFormatClass(TextInputFormat.class);
// TODO: set map class and the map output key and value classes
job.setMapperClass(WCMapperCombiner.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
// * TODO: set the combiner class and the combiner output key and value classes
// TODO: set reduce class and the reduce output key and value classes
job.setReducerClass(WCReducerCombiner.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
// TODO: set job output format
job.setOutputFormatClass(TextOutputFormat.class);
// TODO: add the input file as job input (from HDFS)
FileInputFormat.addInputPath(job, this.inputPath);
// TODO: set the output path for the job results (to HDFS)
FileOutputFormat.setOutputPath(job,this.outputDir);
// TODO: set the number of reducers. This is optional and by default is 1
job.setNumReduceTasks(this.numReducers);
// TODO: set the jar class
job.setJarByClass(WordCountCombiner.class);
return job.waitForCompletion(true) ? 0 : 1; // this will execute the job
}
public WordCountCombiner (String[] args) {
if (args.length != 3) {
System.out.println("Usage: WordCountCombiner <num_reducers> <input_path> <output_path>");
System.exit(0);
}
this.numReducers = Integer.parseInt(args[0]);
this.inputPath = new Path(args[1]);
this.outputDir = new Path(args[2]);
}
public static void main(String args[]) throws Exception {
int res = ToolRunner.run(new Configuration(), new WordCountCombiner(args), args);
System.exit(res);
}
}
class WCMapperCombiner extends Mapper<LongWritable, Text, Text, LongWritable> {
private Text word = new Text();
private final static LongWritable ONE = new LongWritable(1);
@Override
protected void map(LongWritable offset, Text text, Context context)
throws IOException, InterruptedException {
StringTokenizer iter = new StringTokenizer(text.toString());
while (iter.hasMoreTokens()) {
this.word.set(iter.nextToken());
context.write(this.word , ONE);
}
}
}
class WCReducerCombiner extends Reducer<Text, LongWritable, Text, LongWritable> {
@Override
protected void reduce(Text word, Iterable<LongWritable> values, Context context)
throws IOException, InterruptedException {
long accumulator = 0;
for (LongWritable value : values) {
accumulator += value.get();
}
context.write(word, new LongWritable(accumulator));
}
}
package fr.eurecom.dsg.mapreduce;
import java.io.IOException;
import java.util.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* Word Count example of MapReduce job. Given a plain text in input, this job
* counts how many occurrences of each word there are in that text and writes
* the result on HDFS.
*
*/
public class WordCountIMC extends Configured implements Tool {
private int numReducers;
private Path inputPath;
private Path outputDir;
@Override
public int run(String[] args) throws Exception {
//Job job = null; // DONE: define new job instead of null using conf e setting
// a name
Configuration conf = this.getConf();
Job job = new Job(conf,"WordCountIMC");
// DONE: set job input format
job.setInputFormatClass(TextInputFormat.class);
// DONE: set map class and the map output key and value classes
job.setMapperClass(WCIMCMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
// DONE: set reduce class and the reduce output key and value classes
job.setReducerClass(WCIMCReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
// DONE: set job output format
job.setOutputFormatClass(OutputFormat.class);
// DONE: add the input file as job input (from HDFS)
FileInputFormat.addInputPath(job,this.inputPath);
// DONE: set the output path for the job results (to HDFS)
FileOutputFormat.setOutputPath(job,this.outputDir);
// DONE: set the number of reducers. This is optional and by default is 1
job.setNumReduceTasks(this.numReducers);
// DONE: set the jar class
job.setJarByClass(WordCountIMC.class);
return job.waitForCompletion(true) ? 0 : 1; // this will execute the job
}
public WordCountIMC (String[] args) {
if (args.length != 3) {
System.out.println("Usage: WordCountIMC <num_reducers> <input_path> <output_path>");
System.exit(0);
}
this.numReducers = Integer.parseInt(args[0]);
this.inputPath = new Path(args[1]);
this.outputDir = new Path(args[2]);
}
public static void main(String args[]) throws Exception {
int res = ToolRunner.run(new Configuration(), new WordCountIMC(args), args);
System.exit(res);
}
}
class WCIMCMapper extends Mapper<LongWritable,
// type
Text,
Text,
IntWritable> {
private IntWritable ONE = new IntWritable(1);
private Map<String,IntWritable> H;
private Text textValue = new Text();
@Override
protected void setup(Context context){
this.H = new HashMap<String, IntWritable>() ;
}
@Override
protected void map(LongWritable key,
Text text,
Context context) throws IOException, InterruptedException {
// * TODO: implement the map method (use context.write to emit results). Use
// the in-memory combiner technique
StringTokenizer iter = new StringTokenizer(text.toString());
while (iter.hasMoreTokens()) {
String word = iter.nextToken();
if (!H.containsKey(word)) {
H.put(word, ONE);
}
else {
H.put(word, new IntWritable((H.get(word).get() + 1)));
}
}
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
Iterator it= this.H.entrySet().iterator();
while (it.hasNext()) {
Map.Entry<String,IntWritable> pair = (Map.Entry)it.next();
textValue.set(pair.getKey().toString());
context.write(textValue,pair.getValue());
}
}
}
class WCIMCReducer extends Reducer<Text,
// type
IntWritable,
Text,
IntWritable> {
private IntWritable writableSum = new IntWritable();
@Override
protected void reduce(Text key,
Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {
// TODO: implement the reduce method (use context.write to emit results)
int sum = 0;
for (IntWritable value : values)
sum += value.get();
writableSum.set(sum);
context.write(key,writableSum);
}
}
}
\ No newline at end of file
Manifest-Version: 1.0
Build-Jdk: 1.7.0_75
Built-By: hrua
Created-By: IntelliJ IDEA
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment