Commit 994a839a authored by leroyq's avatar leroyq

second commit

parent 228d7945
-- Load input data from local input directory
A = LOAD './sample-input/WORD_COUNT/sample.txt';
-- Parse and clean input data
B = FOREACH A GENERATE FLATTEN(TOKENIZE((chararray)$0)) AS word;
C = FILTER B BY word MATCHES '\\w+';
-- Explicit the GROUP-BY
D = GROUP C BY word;
-- Generate output data in the form: <word, counts>
E = FOREACH D GENERATE group, COUNT(C);
-- Store output data in local output directory
STORE E INTO './local-output/WORD_COUNT/';
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment