Commit 19b01b2c authored by dangq's avatar dangq

StringtoIntMap.java

parent 584b40fb
......@@ -2,9 +2,8 @@
<project version="4">
<component name="ChangeListManager">
<list default="true" id="30ff8914-3a60-4b29-8ec6-4036cadfcba8" name="Default" comment="">
<change type="DELETED" beforePath="$PROJECT_DIR$/WordCount.iml" afterPath="" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/src/main/java/fr/eurecom/dsg/mapreduce/Pair.java" afterPath="$PROJECT_DIR$/src/main/java/fr/eurecom/dsg/mapreduce/Pair.java" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/.idea/misc.xml" afterPath="$PROJECT_DIR$/.idea/misc.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/src/main/java/fr/eurecom/dsg/mapreduce/Stripes.java" afterPath="$PROJECT_DIR$/src/main/java/fr/eurecom/dsg/mapreduce/Stripes.java" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/.idea/workspace.xml" afterPath="$PROJECT_DIR$/.idea/workspace.xml" />
</list>
<ignored path="WordCount.iws" />
......@@ -36,42 +35,18 @@
</component>
<component name="FileEditorManager">
<leaf>
<file leaf-file-name="Pair.java" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/src/main/java/fr/eurecom/dsg/mapreduce/Pair.java">
<file leaf-file-name="StringToIntMapWritable.java" pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/src/main/java/fr/eurecom/dsg/mapreduce/StringToIntMapWritable.java">
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="0.0" vertical-offset="1460" max-vertical-offset="1890">
<caret line="137" column="0" selection-start-line="137" selection-start-column="0" selection-end-line="137" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
</file>
<file leaf-file-name="Stripes.java" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/src/main/java/fr/eurecom/dsg/mapreduce/Stripes.java">
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="0.0" vertical-offset="0" max-vertical-offset="1185">
<caret line="24" column="18" selection-start-line="24" selection-start-column="14" selection-end-line="24" selection-end-column="18" />
<folding />
</state>
</provider>
</entry>
</file>
<file leaf-file-name="TextPair.java" pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/src/main/java/fr/eurecom/dsg/mapreduce/TextPair.java">
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="0.067567565" vertical-offset="1055" max-vertical-offset="2475">
<caret line="83" column="5" selection-start-line="83" selection-start-column="5" selection-end-line="83" selection-end-column="5" />
<folding />
</state>
</provider>
</entry>
</file>
<file leaf-file-name="pom.xml" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/pom.xml">
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="0.0" vertical-offset="1056" max-vertical-offset="1485">
<caret line="83" column="62" selection-start-line="83" selection-start-column="62" selection-end-line="83" selection-end-column="62" />
<folding />
<state vertical-scroll-proportion="2.4596775" vertical-offset="225" max-vertical-offset="1455">
<caret line="76" column="40" selection-start-line="76" selection-start-column="40" selection-end-line="76" selection-end-column="40" />
<folding>
<element signature="imports" expanded="true" />
<element signature="e#687#688#0" expanded="true" />
<element signature="e#723#724#0" expanded="true" />
<element signature="e#761#762#0" expanded="true" />
<element signature="e#813#814#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
......@@ -91,6 +66,8 @@
<option value="$PROJECT_DIR$/src/main/java/fr/eurecom/dsg/mapreduce/WordCountCombiner.java" />
<option value="$PROJECT_DIR$/src/main/java/fr/eurecom/dsg/mapreduce/TextPair.java" />
<option value="$PROJECT_DIR$/src/main/java/fr/eurecom/dsg/mapreduce/Pair.java" />
<option value="$PROJECT_DIR$/src/main/java/fr/eurecom/dsg/mapreduce/Stripes.java" />
<option value="$PROJECT_DIR$/src/main/java/fr/eurecom/dsg/mapreduce/StringToIntMapWritable.java" />
</list>
</option>
</component>
......@@ -136,10 +113,10 @@
</treeState>
</component>
<component name="ProjectFrameBounds">
<option name="x" value="75" />
<option name="y" value="64" />
<option name="width" value="1281" />
<option name="height" value="704" />
<option name="x" value="683" />
<option name="y" value="39" />
<option name="width" value="683" />
<option name="height" value="729" />
</component>
<component name="ProjectLevelVcsManager" settingsEditedManually="false">
<OptionsSetting value="true" id="Add" />
......@@ -164,8 +141,8 @@
<sortByType />
</navigator>
<panes>
<pane id="PackagesPane" />
<pane id="Scope" />
<pane id="PackagesPane" />
<pane id="ProjectPane">
<subPane>
<PATH>
......@@ -407,19 +384,6 @@
</ExternalSystemSettings>
<method />
</configuration>
<configuration default="true" type="Applet" factoryName="Applet">
<module name="" />
<option name="MAIN_CLASS_NAME" />
<option name="HTML_FILE_NAME" />
<option name="HTML_USED" value="false" />
<option name="WIDTH" value="400" />
<option name="HEIGHT" value="300" />
<option name="POLICY_FILE" value="$APPLICATION_HOME_DIR$/bin/appletviewer.policy" />
<option name="VM_PARAMETERS" />
<option name="ALTERNATIVE_JRE_PATH_ENABLED" value="false" />
<option name="ALTERNATIVE_JRE_PATH" />
<method />
</configuration>
<configuration default="true" type="TestNG" factoryName="TestNG">
<extension name="coverage" enabled="false" merge="false" sample_coverage="true" runner="idea" />
<module name="" />
......@@ -448,6 +412,19 @@
<listeners />
<method />
</configuration>
<configuration default="true" type="Applet" factoryName="Applet">
<module name="" />
<option name="MAIN_CLASS_NAME" />
<option name="HTML_FILE_NAME" />
<option name="HTML_USED" value="false" />
<option name="WIDTH" value="400" />
<option name="HEIGHT" value="300" />
<option name="POLICY_FILE" value="$APPLICATION_HOME_DIR$/bin/appletviewer.policy" />
<option name="VM_PARAMETERS" />
<option name="ALTERNATIVE_JRE_PATH_ENABLED" value="false" />
<option name="ALTERNATIVE_JRE_PATH" />
<method />
</configuration>
<configuration default="true" type="Application" factoryName="Application">
<extension name="coverage" enabled="false" merge="false" sample_coverage="true" runner="idea" />
<option name="MAIN_CLASS_NAME" />
......@@ -531,14 +508,14 @@
<servers />
</component>
<component name="ToolWindowManager">
<frame x="75" y="64" width="1281" height="704" extended-state="0" />
<editor active="false" />
<frame x="683" y="39" width="683" height="729" extended-state="4" />
<editor active="true" />
<layout>
<window_info id="Palette&#9;" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
<window_info id="UI Designer" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
<window_info id="Changes" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
<window_info id="Designer" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
<window_info id="Terminal" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" weight="0.32597622" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
<window_info id="Terminal" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" weight="0.32459018" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
<window_info id="Palette" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
<window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
<window_info id="Event Log" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="7" side_tool="true" content_ui="tabs" />
......@@ -549,15 +526,15 @@
<window_info id="Maven Projects" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.32930514" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
<window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="SLIDING" type="SLIDING" visible="false" weight="0.4" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
<window_info id="Application Servers" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
<window_info id="Project" active="true" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" weight="0.24980484" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" />
<window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" weight="0.042459738" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" />
<window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="4" side_tool="false" content_ui="tabs" />
<window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
<window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.32924962" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
<window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.4" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
<window_info id="Messages" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.32924962" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
<window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="2" side_tool="false" content_ui="combo" />
<window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.4" sideWeight="0.5" order="5" side_tool="false" content_ui="tabs" />
<window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.32924962" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
<window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.4" sideWeight="0.5" order="5" side_tool="false" content_ui="tabs" />
<window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="2" side_tool="false" content_ui="combo" />
</layout>
</component>
<component name="Vcs.Log.UiProperties">
......@@ -594,6 +571,52 @@
<option name="FILTER_TARGETS" value="false" />
</component>
<component name="editorHistoryManager">
<entry file="file://$PROJECT_DIR$/src/main/java/fr/eurecom/dsg/mapreduce/Pair.java">
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="0.0" vertical-offset="180" max-vertical-offset="1890">
<caret line="42" column="0" selection-start-line="32" selection-start-column="8" selection-end-line="42" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/src/main/java/fr/eurecom/dsg/mapreduce/Stripes.java">
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="0.0" vertical-offset="905" max-vertical-offset="1545">
<caret line="83" column="7" selection-start-line="83" selection-start-column="7" selection-end-line="83" selection-end-column="7" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/src/main/java/fr/eurecom/dsg/mapreduce/StringToIntMapWritable.java">
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="0.0" vertical-offset="0" max-vertical-offset="720">
<caret line="0" column="0" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
<folding>
<element signature="imports" expanded="true" />
<element signature="e#687#688#0" expanded="true" />
<element signature="e#723#724#0" expanded="true" />
<element signature="e#761#762#0" expanded="true" />
<element signature="e#813#814#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/src/main/java/fr/eurecom/dsg/mapreduce/TextPair.java">
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="0.0" vertical-offset="1145" max-vertical-offset="2475">
<caret line="110" column="0" selection-start-line="110" selection-start-column="0" selection-end-line="110" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/pom.xml">
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="0.0" vertical-offset="1056" max-vertical-offset="1485">
<caret line="83" column="62" selection-start-line="83" selection-start-column="62" selection-end-line="83" selection-end-column="62" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/src/main/java/fr/eurecom/dsg/mapreduce/Pair.java">
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="0.0" vertical-offset="105" max-vertical-offset="1665">
......@@ -654,9 +677,6 @@
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="0.0" vertical-offset="954" max-vertical-offset="1425">
<caret line="81" column="0" selection-start-line="57" selection-start-column="8" selection-end-line="81" selection-end-column="0" />
<folding>
<element signature="imports" expanded="false" />
</folding>
</state>
</provider>
</entry>
......@@ -754,9 +774,6 @@
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="0.34146342" vertical-offset="954" max-vertical-offset="1425">
<caret line="72" column="0" selection-start-line="72" selection-start-column="0" selection-end-line="72" selection-end-column="0" />
<folding>
<element signature="imports" expanded="false" />
</folding>
</state>
</provider>
</entry>
......@@ -770,9 +787,17 @@
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/src/main/java/fr/eurecom/dsg/mapreduce/TextPair.java">
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="-0.08064516" vertical-offset="585" max-vertical-offset="2475">
<caret line="44" column="12" selection-start-line="44" selection-start-column="12" selection-end-line="44" selection-end-column="12" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/pom.xml">
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="0.0" vertical-offset="1056" max-vertical-offset="1485">
<state vertical-scroll-proportion="0.5080645" vertical-offset="1056" max-vertical-offset="1485">
<caret line="83" column="62" selection-start-line="83" selection-start-column="62" selection-end-line="83" selection-end-column="62" />
<folding />
</state>
......@@ -780,27 +805,41 @@
</entry>
<entry file="file://$PROJECT_DIR$/src/main/java/fr/eurecom/dsg/mapreduce/Pair.java">
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="0.0" vertical-offset="1460" max-vertical-offset="1890">
<caret line="137" column="0" selection-start-line="137" selection-start-column="0" selection-end-line="137" selection-end-column="0" />
<state vertical-scroll-proportion="0.6048387" vertical-offset="180" max-vertical-offset="1890">
<caret line="44" column="5" selection-start-line="44" selection-start-column="5" selection-end-line="44" selection-end-column="5" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/src/main/java/fr/eurecom/dsg/mapreduce/Stripes.java">
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="0.0" vertical-offset="0" max-vertical-offset="1185">
<caret line="24" column="18" selection-start-line="24" selection-start-column="14" selection-end-line="24" selection-end-column="18" />
<state vertical-scroll-proportion="0.4327957" vertical-offset="1114" max-vertical-offset="1545">
<caret line="101" column="6" selection-start-line="101" selection-start-column="6" selection-end-line="101" selection-end-column="6" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/src/main/java/fr/eurecom/dsg/mapreduce/TextPair.java">
<entry file="jar://$MAVEN_REPOSITORY$/org/apache/hadoop/hadoop-common/2.5.0-cdh5.3.2/hadoop-common-2.5.0-cdh5.3.2.jar!/org/apache/hadoop/io/Writable.class">
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="0.067567565" vertical-offset="1055" max-vertical-offset="2475">
<caret line="83" column="5" selection-start-line="83" selection-start-column="5" selection-end-line="83" selection-end-column="5" />
<state vertical-scroll-proportion="0.33333334" vertical-offset="0" max-vertical-offset="360">
<caret line="15" column="17" selection-start-line="15" selection-start-column="17" selection-end-line="15" selection-end-column="17" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/src/main/java/fr/eurecom/dsg/mapreduce/StringToIntMapWritable.java">
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="2.4596775" vertical-offset="225" max-vertical-offset="1455">
<caret line="76" column="40" selection-start-line="76" selection-start-column="40" selection-end-line="76" selection-end-column="40" />
<folding>
<element signature="imports" expanded="true" />
<element signature="e#687#688#0" expanded="true" />
<element signature="e#723#724#0" expanded="true" />
<element signature="e#761#762#0" expanded="true" />
<element signature="e#813#814#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</component>
</project>
\ No newline at end of file
......@@ -94,7 +94,7 @@ public class Pair extends Configured implements Tool {
// TODO: set job input format
Configuration conf = this.getConf();
Job job = new Job(conf,"Word Count");
Job job = new Job(conf,"PAIR");
job.setInputFormatClass(TextInputFormat.class);
......
package fr.eurecom.dsg.mapreduce;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.TreeSet;
import org.apache.hadoop.io.Writable;
/*
* Very simple (and scholastic) implementation of a Writable associative array for String to Int
*
**/
public class StringToIntMapWritable implements Writable {
// TODO: add an internal field that is the real associative array
//private HashMap<String, Integer> hm = new HashMap<String, Integer>();
public final Map<String, Integer> counts;
public StringToIntMapWritable(Map<String, Integer> counts) {
this.counts = counts;
}
public StringToIntMapWritable(){
this(new HashMap<String, Integer>());
}
@Override
public int hashCode(){
final int prime=31;
int result =1;
result= prime * result + ((counts==null)? 0: counts.hashCode());
return result;
}
@Override
public boolean equals(Object obj){
if (this==obj)
return true;
if (obj==null)
return false;
if (getClass()!=obj.getClass())
return false;
StringToIntMapWritable other =(StringToIntMapWritable) obj;
if (counts==null){
if (other.counts!=null)
return false;
} else if (!counts.equals(other.counts))
return false;
return true;
}
@Override
public void readFields(DataInput in) throws IOException {
// TODO: implement deserialization
counts.clear();
String inLine=in.readLine();
if (inLine!=null){
StringTokenizer tokenizer = new StringTokenizer(inLine," ");
while (tokenizer.hasMoreElements()){
String occurence = tokenizer.nextToken();
String occurenceWord = occurence.substring(0,occurence.lastIndexOf("-"));
int occurenceCount = Integer.parseInt(occurence.substring(occurence.lastIndexOf("-")+1));
counts.put(occurenceWord,occurenceCount);
}
}
}
// Warning: for efficiency reasons, Hadoop attempts to re-use old instances of
// StringToIntMapWritable when reading new records. Remember to initialize your variables
// inside this function, in order to get rid of old data.
@Override
public void write(DataOutput out) throws IOException {
// TODO: implement serialization
for (String s : counts.keySet()) {
out.write((s + "-" + counts.get(s) + " ").getBytes());
}
}
@Override
public String toString(){
StringBuffer s = new StringBuffer();
for (String key: new TreeSet<String>(counts.keySet())){
s.append((key+"-"+counts.get(key)+" "));
}
return s.toString();
}
}
......@@ -5,9 +5,16 @@ import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
......@@ -22,16 +29,28 @@ public class Stripes extends Configured implements Tool {
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
Job job = null; // TODO: define new job instead of null using conf e setting a name
// TODO: set job input format
//Job job = null; // TODO: define new job instead of null using conf e setting a name
Job job = new Job(conf,"STRIPES");
// TODO: set job input format
job.setInputFormatClass(TextInputFormat.class);
// TODO: set map class and the map output key and value classes
job.setMapperClass(StripesMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
// TODO: set reduce class and the reduce output key and value classes
job.setReducerClass(StripesReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
// TODO: set job output format
job.setOutputFormatClass(TextOutputFormat.class);
// TODO: add the input file as job input (from HDFS) to the variable inputFile
FileInputFormat.addInputPath(job, this.inputPath);
// TODO: set the output path for the job results (to HDFS) to the variable outputPath
FileOutputFormat.setOutputPath(job, this.outputDir);
// TODO: set the number of reducers using variable numberReducers
job.setNumReduceTasks(this.numReducers);
// TODO: set the jar class
job.setJarByClass(WordCount.class);
return job.waitForCompletion(true) ? 0 : 1;
}
......@@ -53,29 +72,41 @@ public class Stripes extends Configured implements Tool {
}
class StripesMapper
extends Mapper<Object, // TODO: change Object to input key type
Object, // TODO: change Object to input value type
Object, // TODO: change Object to output key type
Object> { // TODO: change Object to output value type
extends Mapper<LongWritable, // TODO: change Object to input key type
Text, // TODO: change Object to input value type
TextPair, // TODO: change Object to output key type
IntWritable> { // TODO: change Object to output value type
@Override
public void map(Object key, // TODO: change Object to input key type
Object value, // TODO: change Object to input value type
public void map(LongWritable key, // TODO: change Object to input key type
Text value, // TODO: change Object to input value type
Context context)
throws java.io.IOException, InterruptedException {
// TODO: implement map method
String line = this.toString();
line = line.replaceAll("[^a-zA-Z0-9_]+", " ");
line = line.replaceAll("^\\s+", "");
String[] words = line.split("\\s+");
for (int i = 0; i < words.length - 1; i++) {
String first = words[i];
for (int j = 0; j < words.length - 1; j++) {
if (i != j) {
String second = words[i + 1];
context.write(new TextPair(first, second), new IntWritable(1));
}
}
}
// TODO: implement map method
}
}
class StripesReducer
extends Reducer<Object, // TODO: change Object to input key type
Object, // TODO: change Object to input value type
Object, // TODO: change Object to output key type
Object> { // TODO: change Object to output value type
extends Reducer<TextPair, // TODO: change Object to input key type
IntWritable, // TODO: change Object to input value type
TextPair, // TODO: change Object to output key type
IntWritable> { // TODO: change Object to output value type
@Override
public void reduce(Object key, // TODO: change Object to input key type
Iterable<Object> values, // TODO: change Object to input value type
public void reduce(TextPair key, // TODO: change Object to input key type
Iterable<IntWritable> values, // TODO: change Object to input value type
Context context) throws IOException, InterruptedException {
// TODO: implement the reduce method
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment