-- TODO: load the input dataset, located in ./local-input/OSN/tw.txt
-- You can specify parameters when executing the script by using the -p flag, and -f to point to your script
-- pig -f tw-join.pig -p input=/data/TWITTER/twitter_graph2.txt
-- pig -f tw-join.pig -p input=/data/TWITTER/twitter_graph2.txt output=/output/OSN/twj/
-- For local testing with the default values, you'd just run it without any -p or -f flags:
-- pig -x local tw-join.pig
-- pig -x local ./sample-solutions/OSN/tw-join.pig
-- Set default parallel
SET default_parallel 20;
%default input './local-input/OSN/tw.txt'
%default output '/local-output/OSN/twj/'
datasetA = LOAD '$input' AS (id: long, fr: long);
......@@ -29,4 +30,4 @@ d_result = DISTINCT p_result;
-- make sure you avoid loops (e.g., if user 12 and 13 follow eachother)
result = FILTER d_result BY $0!=$1;
STORE result INTO '/output/OSN/twj/';
STORE result INTO '$output';
