Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
W
WordCount_lab
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
dangq
WordCount_lab
Commits
19b01b2c
Commit
19b01b2c
authored
Mar 24, 2015
by
dangq
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
StringtoIntMap.java
parent
584b40fb
Changes
4
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
254 additions
and
93 deletions
+254
-93
.idea/workspace.xml
.idea/workspace.xml
+114
-75
src/main/java/fr/eurecom/dsg/mapreduce/Pair.java
src/main/java/fr/eurecom/dsg/mapreduce/Pair.java
+1
-1
src/main/java/fr/eurecom/dsg/mapreduce/StringToIntMapWritable.java
...java/fr/eurecom/dsg/mapreduce/StringToIntMapWritable.java
+91
-0
src/main/java/fr/eurecom/dsg/mapreduce/Stripes.java
src/main/java/fr/eurecom/dsg/mapreduce/Stripes.java
+48
-17
No files found.
.idea/workspace.xml
View file @
19b01b2c
This diff is collapsed.
Click to expand it.
src/main/java/fr/eurecom/dsg/mapreduce/Pair.java
View file @
19b01b2c
...
...
@@ -94,7 +94,7 @@ public class Pair extends Configured implements Tool {
// TODO: set job input format
Configuration
conf
=
this
.
getConf
();
Job
job
=
new
Job
(
conf
,
"
Word Count
"
);
Job
job
=
new
Job
(
conf
,
"
PAIR
"
);
job
.
setInputFormatClass
(
TextInputFormat
.
class
);
...
...
src/main/java/fr/eurecom/dsg/mapreduce/StringToIntMapWritable.java
0 → 100644
View file @
19b01b2c
package
fr.eurecom.dsg.mapreduce
;
import
java.io.DataInput
;
import
java.io.DataOutput
;
import
java.io.IOException
;
import
java.util.HashMap
;
import
java.util.Map
;
import
java.util.StringTokenizer
;
import
java.util.TreeSet
;
import
org.apache.hadoop.io.Writable
;
/*
* Very simple (and scholastic) implementation of a Writable associative array for String to Int
*
**/
public
class
StringToIntMapWritable
implements
Writable
{
// TODO: add an internal field that is the real associative array
//private HashMap<String, Integer> hm = new HashMap<String, Integer>();
public
final
Map
<
String
,
Integer
>
counts
;
public
StringToIntMapWritable
(
Map
<
String
,
Integer
>
counts
)
{
this
.
counts
=
counts
;
}
public
StringToIntMapWritable
(){
this
(
new
HashMap
<
String
,
Integer
>());
}
@Override
public
int
hashCode
(){
final
int
prime
=
31
;
int
result
=
1
;
result
=
prime
*
result
+
((
counts
==
null
)?
0
:
counts
.
hashCode
());
return
result
;
}
@Override
public
boolean
equals
(
Object
obj
){
if
(
this
==
obj
)
return
true
;
if
(
obj
==
null
)
return
false
;
if
(
getClass
()!=
obj
.
getClass
())
return
false
;
StringToIntMapWritable
other
=(
StringToIntMapWritable
)
obj
;
if
(
counts
==
null
){
if
(
other
.
counts
!=
null
)
return
false
;
}
else
if
(!
counts
.
equals
(
other
.
counts
))
return
false
;
return
true
;
}
@Override
public
void
readFields
(
DataInput
in
)
throws
IOException
{
// TODO: implement deserialization
counts
.
clear
();
String
inLine
=
in
.
readLine
();
if
(
inLine
!=
null
){
StringTokenizer
tokenizer
=
new
StringTokenizer
(
inLine
,
" "
);
while
(
tokenizer
.
hasMoreElements
()){
String
occurence
=
tokenizer
.
nextToken
();
String
occurenceWord
=
occurence
.
substring
(
0
,
occurence
.
lastIndexOf
(
"-"
));
int
occurenceCount
=
Integer
.
parseInt
(
occurence
.
substring
(
occurence
.
lastIndexOf
(
"-"
)+
1
));
counts
.
put
(
occurenceWord
,
occurenceCount
);
}
}
}
// Warning: for efficiency reasons, Hadoop attempts to re-use old instances of
// StringToIntMapWritable when reading new records. Remember to initialize your variables
// inside this function, in order to get rid of old data.
@Override
public
void
write
(
DataOutput
out
)
throws
IOException
{
// TODO: implement serialization
for
(
String
s
:
counts
.
keySet
())
{
out
.
write
((
s
+
"-"
+
counts
.
get
(
s
)
+
" "
).
getBytes
());
}
}
@Override
public
String
toString
(){
StringBuffer
s
=
new
StringBuffer
();
for
(
String
key:
new
TreeSet
<
String
>(
counts
.
keySet
())){
s
.
append
((
key
+
"-"
+
counts
.
get
(
key
)+
" "
));
}
return
s
.
toString
();
}
}
src/main/java/fr/eurecom/dsg/mapreduce/Stripes.java
View file @
19b01b2c
...
...
@@ -5,9 +5,16 @@ import java.io.IOException;
import
org.apache.hadoop.conf.Configuration
;
import
org.apache.hadoop.conf.Configured
;
import
org.apache.hadoop.fs.Path
;
import
org.apache.hadoop.io.IntWritable
;
import
org.apache.hadoop.io.LongWritable
;
import
org.apache.hadoop.io.Text
;
import
org.apache.hadoop.mapreduce.Job
;
import
org.apache.hadoop.mapreduce.Mapper
;
import
org.apache.hadoop.mapreduce.Reducer
;
import
org.apache.hadoop.mapreduce.lib.input.FileInputFormat
;
import
org.apache.hadoop.mapreduce.lib.input.TextInputFormat
;
import
org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
;
import
org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
;
import
org.apache.hadoop.util.Tool
;
import
org.apache.hadoop.util.ToolRunner
;
...
...
@@ -22,16 +29,28 @@ public class Stripes extends Configured implements Tool {
public
int
run
(
String
[]
args
)
throws
Exception
{
Configuration
conf
=
this
.
getConf
();
Job
job
=
null
;
// TODO: define new job instead of null using conf e setting a name
//
Job job = null; // TODO: define new job instead of null using conf e setting a name
Job
job
=
new
Job
(
conf
,
"STRIPES"
);
// TODO: set job input format
job
.
setInputFormatClass
(
TextInputFormat
.
class
);
// TODO: set map class and the map output key and value classes
job
.
setMapperClass
(
StripesMapper
.
class
);
job
.
setMapOutputKeyClass
(
Text
.
class
);
job
.
setMapOutputValueClass
(
IntWritable
.
class
);
// TODO: set reduce class and the reduce output key and value classes
job
.
setReducerClass
(
StripesReducer
.
class
);
job
.
setOutputKeyClass
(
Text
.
class
);
job
.
setOutputValueClass
(
IntWritable
.
class
);
// TODO: set job output format
job
.
setOutputFormatClass
(
TextOutputFormat
.
class
);
// TODO: add the input file as job input (from HDFS) to the variable inputFile
FileInputFormat
.
addInputPath
(
job
,
this
.
inputPath
);
// TODO: set the output path for the job results (to HDFS) to the variable outputPath
FileOutputFormat
.
setOutputPath
(
job
,
this
.
outputDir
);
// TODO: set the number of reducers using variable numberReducers
job
.
setNumReduceTasks
(
this
.
numReducers
);
// TODO: set the jar class
job
.
setJarByClass
(
WordCount
.
class
);
return
job
.
waitForCompletion
(
true
)
?
0
:
1
;
}
...
...
@@ -53,29 +72,41 @@ public class Stripes extends Configured implements Tool {
}
class
StripesMapper
extends
Mapper
<
Object
,
// TODO: change Object to input key type
Objec
t
,
// TODO: change Object to input value type
Object
,
// TODO: change Object to output key type
Object
>
{
// TODO: change Object to output value type
extends
Mapper
<
LongWritable
,
// TODO: change Object to input key type
Tex
t
,
// TODO: change Object to input value type
TextPair
,
// TODO: change Object to output key type
IntWritable
>
{
// TODO: change Object to output value type
@Override
public
void
map
(
Object
key
,
// TODO: change Object to input key type
Objec
t
value
,
// TODO: change Object to input value type
public
void
map
(
LongWritable
key
,
// TODO: change Object to input key type
Tex
t
value
,
// TODO: change Object to input value type
Context
context
)
throws
java
.
io
.
IOException
,
InterruptedException
{
String
line
=
this
.
toString
();
line
=
line
.
replaceAll
(
"[^a-zA-Z0-9_]+"
,
" "
);
line
=
line
.
replaceAll
(
"^\\s+"
,
""
);
String
[]
words
=
line
.
split
(
"\\s+"
);
for
(
int
i
=
0
;
i
<
words
.
length
-
1
;
i
++)
{
String
first
=
words
[
i
];
for
(
int
j
=
0
;
j
<
words
.
length
-
1
;
j
++)
{
if
(
i
!=
j
)
{
String
second
=
words
[
i
+
1
];
context
.
write
(
new
TextPair
(
first
,
second
),
new
IntWritable
(
1
));
}
}
}
// TODO: implement map method
}
}
class
StripesReducer
extends
Reducer
<
Object
,
// TODO: change Object to input key type
Object
,
// TODO: change Object to input value type
Object
,
// TODO: change Object to output key type
Object
>
{
// TODO: change Object to output value type
extends
Reducer
<
TextPair
,
// TODO: change Object to input key type
IntWritable
,
// TODO: change Object to input value type
TextPair
,
// TODO: change Object to output key type
IntWritable
>
{
// TODO: change Object to output value type
@Override
public
void
reduce
(
Object
key
,
// TODO: change Object to input key type
Iterable
<
Object
>
values
,
// TODO: change Object to input value type
public
void
reduce
(
TextPair
key
,
// TODO: change Object to input key type
Iterable
<
IntWritable
>
values
,
// TODO: change Object to input value type
Context
context
)
throws
IOException
,
InterruptedException
{
// TODO: implement the reduce method
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment