Commit 1701bf9b authored by hello's avatar hello

hello

parents
<?xml version="1.0" encoding="UTF-8"?>
<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_7" inherit-compiler-output="false">
<output url="file://$MODULE_DIR$/target/classes" />
<output-test url="file://$MODULE_DIR$/target/test-classes" />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
<excludeFolder url="file://$MODULE_DIR$/target" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-hdfs:2.5.0-cdh5.3.2" level="project" />
<orderEntry type="library" name="Maven: com.google.guava:guava:11.0.2" level="project" />
<orderEntry type="library" name="Maven: org.mortbay.jetty:jetty:6.1.26.cloudera.4" level="project" />
<orderEntry type="library" name="Maven: org.mortbay.jetty:jetty-util:6.1.26.cloudera.4" level="project" />
<orderEntry type="library" name="Maven: com.sun.jersey:jersey-core:1.9" level="project" />
<orderEntry type="library" name="Maven: com.sun.jersey:jersey-server:1.9" level="project" />
<orderEntry type="library" name="Maven: asm:asm:3.1" level="project" />
<orderEntry type="library" name="Maven: commons-cli:commons-cli:1.2" level="project" />
<orderEntry type="library" name="Maven: commons-codec:commons-codec:1.4" level="project" />
<orderEntry type="library" name="Maven: commons-io:commons-io:2.4" level="project" />
<orderEntry type="library" name="Maven: commons-lang:commons-lang:2.6" level="project" />
<orderEntry type="library" name="Maven: commons-logging:commons-logging:1.1.3" level="project" />
<orderEntry type="library" name="Maven: commons-daemon:commons-daemon:1.0.13" level="project" />
<orderEntry type="library" name="Maven: javax.servlet.jsp:jsp-api:2.1" level="project" />
<orderEntry type="library" name="Maven: log4j:log4j:1.2.17" level="project" />
<orderEntry type="library" name="Maven: com.google.protobuf:protobuf-java:2.5.0" level="project" />
<orderEntry type="library" name="Maven: javax.servlet:servlet-api:2.5" level="project" />
<orderEntry type="library" name="Maven: org.codehaus.jackson:jackson-core-asl:1.8.8" level="project" />
<orderEntry type="library" name="Maven: org.codehaus.jackson:jackson-mapper-asl:1.8.8" level="project" />
<orderEntry type="library" name="Maven: tomcat:jasper-runtime:5.5.23" level="project" />
<orderEntry type="library" name="Maven: xmlenc:xmlenc:0.52" level="project" />
<orderEntry type="library" name="Maven: io.netty:netty:3.6.2.Final" level="project" />
<orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-auth:2.5.0-cdh5.3.0" level="project" />
<orderEntry type="library" name="Maven: org.slf4j:slf4j-api:1.7.5" level="project" />
<orderEntry type="library" name="Maven: org.slf4j:slf4j-log4j12:1.7.5" level="project" />
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpclient:4.2.5" level="project" />
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpcore:4.2.4" level="project" />
<orderEntry type="library" name="Maven: org.apache.directory.server:apacheds-kerberos-codec:2.0.0-M15" level="project" />
<orderEntry type="library" name="Maven: org.apache.directory.server:apacheds-i18n:2.0.0-M15" level="project" />
<orderEntry type="library" name="Maven: org.apache.directory.api:api-asn1-api:1.0.0-M20" level="project" />
<orderEntry type="library" name="Maven: org.apache.directory.api:api-util:1.0.0-M20" level="project" />
<orderEntry type="library" name="Maven: org.apache.zookeeper:zookeeper:3.4.5-cdh5.3.0" level="project" />
<orderEntry type="library" name="Maven: jline:jline:0.9.94" level="project" />
<orderEntry type="library" name="Maven: org.apache.curator:curator-framework:2.6.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-common:2.5.0-cdh5.3.2" level="project" />
<orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-annotations:2.5.0-cdh5.3.2" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-math3:3.1.1" level="project" />
<orderEntry type="library" name="Maven: commons-httpclient:commons-httpclient:3.1" level="project" />
<orderEntry type="library" name="Maven: commons-net:commons-net:3.1" level="project" />
<orderEntry type="library" name="Maven: commons-collections:commons-collections:3.2.1" level="project" />
<orderEntry type="library" name="Maven: com.sun.jersey:jersey-json:1.9" level="project" />
<orderEntry type="library" name="Maven: org.codehaus.jettison:jettison:1.1" level="project" />
<orderEntry type="library" name="Maven: com.sun.xml.bind:jaxb-impl:2.2.3-1" level="project" />
<orderEntry type="library" name="Maven: javax.xml.bind:jaxb-api:2.2.2" level="project" />
<orderEntry type="library" name="Maven: javax.xml.stream:stax-api:1.0-2" level="project" />
<orderEntry type="library" name="Maven: javax.activation:activation:1.1" level="project" />
<orderEntry type="library" name="Maven: org.codehaus.jackson:jackson-jaxrs:1.8.3" level="project" />
<orderEntry type="library" name="Maven: org.codehaus.jackson:jackson-xc:1.8.3" level="project" />
<orderEntry type="library" name="Maven: tomcat:jasper-compiler:5.5.23" level="project" />
<orderEntry type="library" name="Maven: commons-el:commons-el:1.0" level="project" />
<orderEntry type="library" name="Maven: net.java.dev.jets3t:jets3t:0.9.0" level="project" />
<orderEntry type="library" name="Maven: com.jamesmurty.utils:java-xmlbuilder:0.4" level="project" />
<orderEntry type="library" name="Maven: commons-configuration:commons-configuration:1.6" level="project" />
<orderEntry type="library" name="Maven: commons-digester:commons-digester:1.8" level="project" />
<orderEntry type="library" name="Maven: commons-beanutils:commons-beanutils:1.7.0" level="project" />
<orderEntry type="library" name="Maven: commons-beanutils:commons-beanutils-core:1.8.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.avro:avro:1.7.6-cdh5.3.2" level="project" />
<orderEntry type="library" name="Maven: com.thoughtworks.paranamer:paranamer:2.3" level="project" />
<orderEntry type="library" name="Maven: org.xerial.snappy:snappy-java:1.0.5" level="project" />
<orderEntry type="library" name="Maven: com.google.code.gson:gson:2.2.4" level="project" />
<orderEntry type="library" name="Maven: com.jcraft:jsch:0.1.42" level="project" />
<orderEntry type="library" name="Maven: org.apache.curator:curator-client:2.6.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.curator:curator-recipes:2.6.0" level="project" />
<orderEntry type="library" name="Maven: com.google.code.findbugs:jsr305:1.3.9" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-compress:1.4.1" level="project" />
<orderEntry type="library" name="Maven: org.tukaani:xz:1.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-core:2.5.0-mr1-cdh5.3.2" level="project" />
<orderEntry type="library" name="Maven: hsqldb:hsqldb:1.8.0.10" level="project" />
<orderEntry type="library" name="Maven: org.eclipse.jdt:core:3.1.1" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: junit:junit:4.10" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.hamcrest:hamcrest-core:1.1" level="project" />
</component>
</module>
\ No newline at end of file
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>fr.eurecom.dsg.mapreduce</groupId>
<artifactId>WordCount</artifactId>
<version>1.0-SNAPSHOT</version>
<packaging>jar</packaging>
<name>WordCount</name>
<url>http://maven.apache.org</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>jdk.tools</groupId>
<artifactId>jdk.tools</artifactId>
<version>1.7</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.5.0-cdh5.3.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-auth</artifactId>
<version>2.5.0-cdh5.3.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.5.0-cdh5.3.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>2.5.0-mr1-cdh5.3.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>2.5.0-cdh5.3.2</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit-dep</artifactId>
<version>4.8.2</version>
</dependency>
</dependencies>
</dependencyManagement>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-auth</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.10</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>2.1</version>
<configuration>
<source>1.7</source>
<target>1.7</target>
</configuration>
</plugin>
</plugins>
</build>
</project>
package fr.eurecom.dsg.mapreduce;
/**
* Hello world!
*
*/
public class App
{
public static void main( String[] args )
{
System.out.println( "Hello World!" );
}
}
/**
* Created by charlesdeschard on 22/03/15.
*/
package fr.eurecom.dsg.mapreduce;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class WordCount extends Configured implements Tool {
static class WCMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
private IntWritable ONE = new IntWritable(1);
private Text textValue = new Text();
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
String[] words = line.split("\\s+");
for(String word : words) {
textValue.set(word);
context.write(textValue, ONE);}
}
}
static class WCReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
private IntWritable writableSum = new IntWritable();
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
int sum = 0;
for (IntWritable value : values)
sum += value.get();
writableSum.set(sum);
context.write(key,writableSum);
}
}
@Override
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
Job job = new Job(conf,"Word Count");
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(WCMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setReducerClass(WCReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[1]));
FileOutputFormat.setOutputPath(job, new Path(args[2]));
job.setNumReduceTasks(Integer.parseInt(args[0]));
job.setJarByClass(WordCount.class);
job.waitForCompletion(true);
return 0;
}
public static void main(String args[]) throws Exception {
ToolRunner.run(new Configuration(), new WordCount(), args);
}
}
\ No newline at end of file
package fr.eurecom.dsg.mapreduce;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class WordCountCombiner extends Configured implements Tool {
private int numReducers;
private Path inputPath;
private Path outputDir;
@Override
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
Job job = new Job(conf,"Word Count");
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(WCMapperCombiner.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setCombinerClass(WCReducerCombiner.class);
job.setReducerClass(WCReducerCombiner.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, this.inputPath);
FileOutputFormat.setOutputPath(job, this.outputDir);
job.setNumReduceTasks(this.numReducers);
job.setJarByClass(WordCount.class);
return job.waitForCompletion(true) ? 0 : 1; // this will execute the job
}
public WordCountCombiner (String[] args) {
if (args.length != 3) {
System.out.println("Usage: WordCountCombiner <num_reducers> <input_path> <output_path>");
System.exit(0);
}
this.numReducers = Integer.parseInt(args[0]);
this.inputPath = new Path(args[1]);
this.outputDir = new Path(args[2]);
}
public static void main(String args[]) throws Exception {
int res = ToolRunner.run(new Configuration(), new WordCountCombiner(args), args);
System.exit(res);
}
}
class WCMapperCombiner extends Mapper<LongWritable, Text, Text, LongWritable> {
private Text word = new Text();
private final static LongWritable ONE = new LongWritable(1);
@Override
protected void map(LongWritable offset, Text text, Context context)
throws IOException, InterruptedException {
StringTokenizer iter = new StringTokenizer(text.toString());
while (iter.hasMoreTokens()) {
this.word.set(iter.nextToken());
context.write(this.word , ONE);
}
}
}
class WCReducerCombiner extends Reducer<Text, LongWritable, Text, LongWritable> {
@Override
protected void reduce(Text word, Iterable<LongWritable> values, Context context)
throws IOException, InterruptedException {
long accumulator = 0;
for (LongWritable value : values) {
accumulator += value.get();
}
context.write(word, new LongWritable(accumulator));
}
}
\ No newline at end of file
package fr.eurecom.dsg.mapreduce;
import java.io.IOException;
import java.util.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* Word Count example of MapReduce job. Given a plain text in input, this job
* counts how many occurrences of each word there are in that text and writes
* the result on HDFS.
*
*/
public class WordCountIMC extends Configured implements Tool {
private int numReducers;
private Path inputPath;
private Path outputDir;
@Override
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
Job job = new Job(conf,"Word Count");
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(WCIMCMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setReducerClass(WCIMCReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, this.inputPath);
FileOutputFormat.setOutputPath(job, this.outputDir);
job.setNumReduceTasks(this.numReducers);
job.setJarByClass(WordCountIMC.class);
job.waitForCompletion(true);
return 0;
}
public WordCountIMC (String[] args) {
if (args.length != 3) {
System.out.println("Usage: WordCountIMC <num_reducers> <input_path> <output_path>");
System.exit(0);
}
this.numReducers = Integer.parseInt(args[0]);
this.inputPath = new Path(args[1]);
this.outputDir = new Path(args[2]);
}
public static void main(String args[]) throws Exception {
int res = ToolRunner.run(new Configuration(), new WordCountIMC(args), args);
System.exit(res);
}
}
class WCIMCMapper extends Mapper<LongWritable,Text, Text, IntWritable> {
private IntWritable ONE = new IntWritable(1);
private Map<String,IntWritable> H;
private Text textValue = new Text();
@Override
protected void setup(Context context){
this.H = new HashMap<String, IntWritable>() ;
}
@Override
protected void map(LongWritable key, Text text, Context context) {
// * TODO: implement the map method (use context.write to emit results). Use
// the in-memory combiner technique
StringTokenizer iter = new StringTokenizer(text.toString());
while (iter.hasMoreTokens()) {
String word = iter.nextToken();
if (!H.containsKey(word)) {
H.put(word, ONE);
}
else {
H.put(word, new IntWritable((H.get(word).get() + 1)));
}
}
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
Iterator it= this.H.entrySet().iterator();
while (it.hasNext()) {
Map.Entry<String,IntWritable> pair = (Map.Entry)it.next();
textValue.set(pair.getKey().toString());
context.write(textValue,pair.getValue());
}
}
}
class WCIMCReducer extends Reducer<Text,
IntWritable,
Text,
IntWritable> {
private IntWritable writableSum = new IntWritable();
@Override
protected void reduce(Text key,
Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable value : values)
sum += value.get();
writableSum.set(sum);
context.write(key,writableSum);
}
}
\ No newline at end of file
package fr.eurecom.dsg.mapreduce;
import junit.framework.Test;
import junit.framework.TestCase;
import junit.framework.TestSuite;
/**
* Unit test for simple App.
*/
public class AppTest
extends TestCase
{
/**
* Create the test case
*
* @param testName name of the test case
*/
public AppTest( String testName )
{
super( testName );
}
/**
* @return the suite of tests being tested
*/
public static Test suite()
{
return new TestSuite( AppTest.class );
}
/**
* Rigourous Test :-)
*/
public void testApp()
{
assertTrue( true );
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment