Commit 96416d02 authored by YUSHIQIAN's avatar YUSHIQIAN

ex1, ex1b

parent b570a41b
-- Set default parallel
SET default_parallel 20;
%default input './local-input/tstat-sample.txt'
%default output '/local-output/tsat/ex1'
-- Load input data from local input directory
A = LOAD '$input' using PigStorage(' ') AS (ip_c:chararray, ….);
-- Group by client IP
B = GROUP A BY ip_c;
-- Generate the output data
C = FOREACH B GENERATE group, COUNT(A);
-- Store the output (and start to execute the script)
STORE C INTO '$output';
\ No newline at end of file
-- Set default parallel
SET default_parallel 20;
%default input './local-input/tstat-sample.txt'
%default output '/local-output/tsat/ex1b'
-- Load input data from local input directory
A = LOAD '$input' using PigStorage(' ') AS (ip_c:chararray, …, ip_s:chararray,...);
ip = FOREACH A GENERATE ip_c, ip_s
-- Group by client IP
B = GROUP A BY ip_c;
-- Group by server IP
C = GROUP A BY ip_s;
-- count the number of TCP connection per each IP, irrespective if client or server IP
twohop = JOIN C by $1, good_datasetB by $0;
-- Generate the output data
C = FOREACH B GENERATE group, COUNT(A);
-- Store the output (and start to execute the script)
STORE C INTO '$output';
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment