Commit e1e0795d authored by YUSHIQIAN's avatar YUSHIQIAN
Browse files

wordcount imc

parent 6c1bd312
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="" />
</component>
</project>
\ No newline at end of file
This diff is collapsed.
<?xml version="1.0" encoding="UTF-8"?>
<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_7" inherit-compiler-output="false">
<output url="file://$MODULE_DIR$/target/classes" />
<output-test url="file://$MODULE_DIR$/target/test-classes" />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
<excludeFolder url="file://$MODULE_DIR$/target" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-hdfs:2.5.0-cdh5.3.2" level="project" />
<orderEntry type="library" name="Maven: com.google.guava:guava:11.0.2" level="project" />
<orderEntry type="library" name="Maven: org.mortbay.jetty:jetty:6.1.26.cloudera.4" level="project" />
<orderEntry type="library" name="Maven: org.mortbay.jetty:jetty-util:6.1.26.cloudera.4" level="project" />
<orderEntry type="library" name="Maven: com.sun.jersey:jersey-core:1.9" level="project" />
<orderEntry type="library" name="Maven: com.sun.jersey:jersey-server:1.9" level="project" />
<orderEntry type="library" name="Maven: asm:asm:3.1" level="project" />
<orderEntry type="library" name="Maven: commons-cli:commons-cli:1.2" level="project" />
<orderEntry type="library" name="Maven: commons-codec:commons-codec:1.4" level="project" />
<orderEntry type="library" name="Maven: commons-io:commons-io:2.4" level="project" />
<orderEntry type="library" name="Maven: commons-lang:commons-lang:2.6" level="project" />
<orderEntry type="library" name="Maven: commons-logging:commons-logging:1.1.3" level="project" />
<orderEntry type="library" name="Maven: commons-daemon:commons-daemon:1.0.13" level="project" />
<orderEntry type="library" name="Maven: javax.servlet.jsp:jsp-api:2.1" level="project" />
<orderEntry type="library" name="Maven: log4j:log4j:1.2.17" level="project" />
<orderEntry type="library" name="Maven: com.google.protobuf:protobuf-java:2.5.0" level="project" />
<orderEntry type="library" name="Maven: javax.servlet:servlet-api:2.5" level="project" />
<orderEntry type="library" name="Maven: org.codehaus.jackson:jackson-core-asl:1.8.8" level="project" />
<orderEntry type="library" name="Maven: org.codehaus.jackson:jackson-mapper-asl:1.8.8" level="project" />
<orderEntry type="library" name="Maven: tomcat:jasper-runtime:5.5.23" level="project" />
<orderEntry type="library" name="Maven: xmlenc:xmlenc:0.52" level="project" />
<orderEntry type="library" name="Maven: io.netty:netty:3.6.2.Final" level="project" />
<orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-auth:2.5.0-cdh5.3.0" level="project" />
<orderEntry type="library" name="Maven: org.slf4j:slf4j-api:1.7.5" level="project" />
<orderEntry type="library" name="Maven: org.slf4j:slf4j-log4j12:1.7.5" level="project" />
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpclient:4.2.5" level="project" />
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpcore:4.2.4" level="project" />
<orderEntry type="library" name="Maven: org.apache.directory.server:apacheds-kerberos-codec:2.0.0-M15" level="project" />
<orderEntry type="library" name="Maven: org.apache.directory.server:apacheds-i18n:2.0.0-M15" level="project" />
<orderEntry type="library" name="Maven: org.apache.directory.api:api-asn1-api:1.0.0-M20" level="project" />
<orderEntry type="library" name="Maven: org.apache.directory.api:api-util:1.0.0-M20" level="project" />
<orderEntry type="library" name="Maven: org.apache.zookeeper:zookeeper:3.4.5-cdh5.3.0" level="project" />
<orderEntry type="library" name="Maven: jline:jline:0.9.94" level="project" />
<orderEntry type="library" name="Maven: org.apache.curator:curator-framework:2.6.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-common:2.5.0-cdh5.3.2" level="project" />
<orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-annotations:2.5.0-cdh5.3.2" level="project" />
<orderEntry type="module-library">
<library name="Maven: jdk.tools:jdk.tools:1.7">
<CLASSES>
<root url="jar:///usr/java/lib/tools.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="library" name="Maven: org.apache.commons:commons-math3:3.1.1" level="project" />
<orderEntry type="library" name="Maven: commons-httpclient:commons-httpclient:3.1" level="project" />
<orderEntry type="library" name="Maven: commons-net:commons-net:3.1" level="project" />
<orderEntry type="library" name="Maven: commons-collections:commons-collections:3.2.1" level="project" />
<orderEntry type="library" name="Maven: com.sun.jersey:jersey-json:1.9" level="project" />
<orderEntry type="library" name="Maven: org.codehaus.jettison:jettison:1.1" level="project" />
<orderEntry type="library" name="Maven: com.sun.xml.bind:jaxb-impl:2.2.3-1" level="project" />
<orderEntry type="library" name="Maven: javax.xml.bind:jaxb-api:2.2.2" level="project" />
<orderEntry type="library" name="Maven: javax.xml.stream:stax-api:1.0-2" level="project" />
<orderEntry type="library" name="Maven: javax.activation:activation:1.1" level="project" />
<orderEntry type="library" name="Maven: org.codehaus.jackson:jackson-jaxrs:1.8.3" level="project" />
<orderEntry type="library" name="Maven: org.codehaus.jackson:jackson-xc:1.8.3" level="project" />
<orderEntry type="library" name="Maven: tomcat:jasper-compiler:5.5.23" level="project" />
<orderEntry type="library" name="Maven: commons-el:commons-el:1.0" level="project" />
<orderEntry type="library" name="Maven: net.java.dev.jets3t:jets3t:0.9.0" level="project" />
<orderEntry type="library" name="Maven: com.jamesmurty.utils:java-xmlbuilder:0.4" level="project" />
<orderEntry type="library" name="Maven: commons-configuration:commons-configuration:1.6" level="project" />
<orderEntry type="library" name="Maven: commons-digester:commons-digester:1.8" level="project" />
<orderEntry type="library" name="Maven: commons-beanutils:commons-beanutils:1.7.0" level="project" />
<orderEntry type="library" name="Maven: commons-beanutils:commons-beanutils-core:1.8.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.avro:avro:1.7.6-cdh5.3.2" level="project" />
<orderEntry type="library" name="Maven: com.thoughtworks.paranamer:paranamer:2.3" level="project" />
<orderEntry type="library" name="Maven: org.xerial.snappy:snappy-java:1.0.5" level="project" />
<orderEntry type="library" name="Maven: com.google.code.gson:gson:2.2.4" level="project" />
<orderEntry type="library" name="Maven: com.jcraft:jsch:0.1.42" level="project" />
<orderEntry type="library" name="Maven: org.apache.curator:curator-client:2.6.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.curator:curator-recipes:2.6.0" level="project" />
<orderEntry type="library" name="Maven: com.google.code.findbugs:jsr305:1.3.9" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-compress:1.4.1" level="project" />
<orderEntry type="library" name="Maven: org.tukaani:xz:1.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-core:2.5.0-mr1-cdh5.3.2" level="project" />
<orderEntry type="library" name="Maven: hsqldb:hsqldb:1.8.0.10" level="project" />
<orderEntry type="library" name="Maven: org.eclipse.jdt:core:3.1.1" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: junit:junit:4.10" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.hamcrest:hamcrest-core:1.1" level="project" />
</component>
</module>
\ No newline at end of file
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>fr.eurecom.dsg.mapreduce</groupId>
<artifactId>Wordcount</artifactId>
<version>IMC</version>
<packaging>jar</packaging>
<name>Wordcount</name>
<url>http://maven.apache.org</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>jdk.tools</groupId>
<artifactId>jdk.tools</artifactId>
<version>1.7</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.5.0-cdh5.3.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-auth</artifactId>
<version>2.5.0-cdh5.3.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.5.0-cdh5.3.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>2.5.0-mr1-cdh5.3.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>2.5.0-cdh5.3.2</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit-dep</artifactId>
<version>4.8.2</version>
</dependency>
</dependencies>
</dependencyManagement>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-auth</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.10</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>2.1</version>
<configuration>
<source>1.7</source>
<target>1.7</target>
</configuration>
</plugin>
</plugins>
</build>
</project>
package fr.eurecom.dsg.mapreduce;
import java.io.IOException;
import java.util.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class WordCountIMC extends Configured implements Tool {
static class WCMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
private IntWritable sum = new IntWritable(1);
//private Text textValue = new Text();
private Hashtable<Text , Integer> map = new Hashtable<Text , Integer>();
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
String[] words = line.split("\\s+");
for(String word : words) {
Text text = new Text(word);
if (map.containsKey(text)) {
map.put(text, map.get(text)+1);
}
else {
map.put(text, 1);
}
}
Enumeration<Text> enumKey = map.keys();
while(enumKey.hasMoreElements()) {
Text textKey = enumKey.nextElement();
Integer val;
synchronized (map) {
val = map.get(textKey);// 2 hello
map.remove(textKey);
}
context.write(textKey,new IntWritable(val));
}
}
}
static class WCReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
private IntWritable writableSum = new IntWritable();
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
int sum = 0;
for (IntWritable value : values)
sum += value.get();
writableSum.set(sum);
context.write(key,writableSum);
}
}
@Override
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
Job job = new Job(conf,"Word Count IMC");
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(WCMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setReducerClass(WCReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[1]));
FileOutputFormat.setOutputPath(job, new Path(args[2]));
job.setNumReduceTasks(Integer.parseInt(args[0]));
job.setJarByClass(WordCountIMC.class);
job.waitForCompletion(true);
return 0;
}
public static void main(String args[]) throws Exception {
ToolRunner.run(new Configuration(), new WordCountIMC(), args);
}
}
\ No newline at end of file
package fr.eurecom.dsg.mapreduce;
import junit.framework.Test;
import junit.framework.TestCase;
import junit.framework.TestSuite;
/**
* Unit test for simple App.
*/
public class AppTest
extends TestCase
{
/**
* Create the test case
*
* @param testName name of the test case
*/
public AppTest( String testName )
{
super( testName );
}
/**
* @return the suite of tests being tested
*/
public static Test suite()
{
return new TestSuite( AppTest.class );
}
/**
* Rigourous Test :-)
*/
public void testApp()
{
assertTrue( true );
}
}
#Generated by Maven
#Fri Mar 27 17:42:49 CET 2015
version=IMC
groupId=fr.eurecom.dsg.mapreduce
artifactId=Wordcount
<?xml version="1.0" encoding="UTF-8" ?>
<testsuite failures="0" time="0.027" errors="0" skipped="0" tests="1" name="fr.eurecom.dsg.mapreduce.AppTest">
<properties>
<property name="java.runtime.name" value="OpenJDK Runtime Environment"/>
<property name="sun.boot.library.path" value="/usr/lib/jvm/java-7-openjdk-amd64/jre/lib/amd64"/>
<property name="java.vm.version" value="24.65-b04"/>
<property name="java.vm.vendor" value="Oracle Corporation"/>
<property name="java.vendor.url" value="http://java.oracle.com/"/>
<property name="path.separator" value=":"/>
<property name="guice.disable.misplaced.annotation.check" value="true"/>
<property name="java.vm.name" value="OpenJDK 64-Bit Server VM"/>
<property name="file.encoding.pkg" value="sun.io"/>
<property name="user.country" value="US"/>
<property name="sun.java.launcher" value="SUN_STANDARD"/>
<property name="sun.os.patch.level" value="unknown"/>
<property name="java.vm.specification.name" value="Java Virtual Machine Specification"/>
<property name="user.dir" value="/homes/yus/cloud_computing_lab1/WordCountIMC"/>
<property name="java.runtime.version" value="1.7.0_65-b32"/>
<property name="java.awt.graphicsenv" value="sun.awt.X11GraphicsEnvironment"/>
<property name="java.endorsed.dirs" value="/usr/lib/jvm/java-7-openjdk-amd64/jre/lib/endorsed"/>
<property name="os.arch" value="amd64"/>
<property name="java.io.tmpdir" value="/tmp"/>
<property name="line.separator" value="
"/>
<property name="java.vm.specification.vendor" value="Oracle Corporation"/>
<property name="os.name" value="Linux"/>
<property name="classworlds.conf" value="/usr/share/maven/bin/m2.conf"/>
<property name="sun.jnu.encoding" value="UTF-8"/>
<property name="java.library.path" value="/usr/local/lib:/usr/lib:/lib::/usr/java/packages/lib/amd64:/usr/lib/x86_64-linux-gnu/jni:/lib/x86_64-linux-gnu:/usr/lib/x86_64-linux-gnu:/usr/lib/jni:/lib:/usr/lib"/>
<property name="java.specification.name" value="Java Platform API Specification"/>
<property name="java.class.version" value="51.0"/>
<property name="sun.management.compiler" value="HotSpot 64-Bit Tiered Compilers"/>
<property name="os.version" value="3.13.0-35-generic"/>
<property name="user.home" value="/homes/yus"/>
<property name="user.timezone" value="Europe/Paris"/>
<property name="java.awt.printerjob" value="sun.print.PSPrinterJob"/>
<property name="file.encoding" value="UTF-8"/>
<property name="java.specification.version" value="1.7"/>
<property name="user.name" value="yus"/>
<property name="java.class.path" value="/usr/share/maven/boot/plexus-classworlds-2.x.jar"/>
<property name="java.vm.specification.version" value="1.7"/>
<property name="sun.arch.data.model" value="64"/>
<property name="java.home" value="/usr/lib/jvm/java-7-openjdk-amd64/jre"/>
<property name="sun.java.command" value="org.codehaus.plexus.classworlds.launcher.Launcher package"/>
<property name="java.specification.vendor" value="Oracle Corporation"/>
<property name="user.language" value="en"/>
<property name="awt.toolkit" value="sun.awt.X11.XToolkit"/>
<property name="java.vm.info" value="mixed mode"/>
<property name="java.version" value="1.7.0_65"/>
<property name="java.ext.dirs" value="/usr/lib/jvm/java-7-openjdk-amd64/jre/lib/ext:/usr/java/packages/lib/ext"/>
<property name="securerandom.source" value="file:/dev/./urandom"/>
<property name="sun.boot.class.path" value="/usr/lib/jvm/java-7-openjdk-amd64/jre/lib/resources.jar:/usr/lib/jvm/java-7-openjdk-amd64/jre/lib/rt.jar:/usr/lib/jvm/java-7-openjdk-amd64/jre/lib/sunrsasign.jar:/usr/lib/jvm/java-7-openjdk-amd64/jre/lib/jsse.jar:/usr/lib/jvm/java-7-openjdk-amd64/jre/lib/jce.jar:/usr/lib/jvm/java-7-openjdk-amd64/jre/lib/charsets.jar:/usr/lib/jvm/java-7-openjdk-amd64/jre/lib/rhino.jar:/usr/lib/jvm/java-7-openjdk-amd64/jre/lib/jfr.jar:/usr/lib/jvm/java-7-openjdk-amd64/jre/classes"/>
<property name="java.vendor" value="Oracle Corporation"/>
<property name="maven.home" value="/usr/share/maven"/>
<property name="file.separator" value="/"/>
<property name="java.vendor.url.bug" value="http://bugreport.sun.com/bugreport/"/>
<property name="sun.cpu.endian" value="little"/>
<property name="sun.io.unicode.encoding" value="UnicodeLittle"/>
<property name="sun.desktop" value="gnome"/>
<property name="sun.cpu.isalist" value=""/>
</properties>
<testcase time="0.004" classname="fr.eurecom.dsg.mapreduce.AppTest" name="testApp"/>
</testsuite>
\ No newline at end of file
-------------------------------------------------------------------------------
Test set: fr.eurecom.dsg.mapreduce.AppTest
-------------------------------------------------------------------------------
Tests run: 1, Failures: 0, Errors: 0, Skipped: 0, Time elapsed: 0.034 sec
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment