Commit 9514f05b authored by YUSHIQIAN's avatar YUSHIQIAN

union

parent e63b06a1
......@@ -10,17 +10,17 @@ A = LOAD '$input' using PigStorage(' ') AS (ip_c:chararray, …, ip_s:chararray,
ip = FOREACH A GENERATE ip_c, ip_s
-- Group by client IP
B = GROUP A BY ip_c;
ip_c = GROUP A BY ip_c;
-- Group by server IP
C = GROUP A BY ip_s;
ip_s = GROUP A BY ip_s;
-- count the number of TCP connection per each IP, irrespective if client or server IP
twohop = JOIN C by $1, good_datasetB by $0;
ips = UNION ip_c, ip_s;
-- Generate the output data
C = FOREACH B GENERATE group, COUNT(A);
C = FOREACH ips GENERATE group, COUNT(A);
-- Store the output (and start to execute the script)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment