diff --git a/pig-twt-e1.pig b/pig-twt-e1.pig new file mode 100644 index 0000000000000000000000000000000000000000..2335567784bfec24e49eac727a7a84b568c28d7b --- /dev/null +++ b/pig-twt-e1.pig @@ -0,0 +1,13 @@ +dataset = LOAD '/laboratory/twitter-small.txt' AS (id: long, fr: long); + +-- TODO: check if user IDs are valid (e.g. not null) and clean the dataset +B = FILTER dataset BY (id>0); + +-- TODO: organize data such that each node ID is associated to a list of neighbors +C = FOREACH B GENERATE fr; + +D = GROUP C BY fr; + +friends = FOREACH D GENERATE group, COUNT(C); + +STORE friends INTO '/twitter/results-e1'; diff --git a/pig-twt-e1.pig~ b/pig-twt-e1.pig~ new file mode 100644 index 0000000000000000000000000000000000000000..c0a84371e46061ee02498af6a8a4990e7738a026 --- /dev/null +++ b/pig-twt-e1.pig~ @@ -0,0 +1,13 @@ +dataset = LOAD '/laboratory/twitter-small.txt' AS (id: long, fr: long); + +-- TODO: check if user IDs are valid (e.g. not null) and clean the dataset +B = FILTER dataset BY (id>0); + +-- TODO: organize data such that each node ID is associated to a list of neighbors +C = FOREACH B GENERATE fr; + +D = GROUP C BY fr; + +friends = FOREACH D GENERATE group, COUNT(C); + +STORE friends INTO '/twitter/results1'; diff --git a/pig-twt-e2.pig b/pig-twt-e2.pig new file mode 100644 index 0000000000000000000000000000000000000000..d59d1cc01ed0037dfb5f8ea03940165f13c082ed --- /dev/null +++ b/pig-twt-e2.pig @@ -0,0 +1,15 @@ +dataset = LOAD '/laboratory/twitter-small.txt' AS (id: long, fr: long); + +-- TODO: check if user IDs are valid (e.g. not null) and clean the dataset +B = FILTER dataset BY (id>0); + +-- TODO: organize data such that each node ID is associated to a list of neighbors +C = FOREACH B GENERATE id; + +D = GROUP C BY id; + +friends = FOREACH D GENERATE group, COUNT(C) AS count; + +friends2 = FILTER friends BY (count > 2) + +STORE friends2 INTO '/twitter/results-e2'; diff --git a/pig-twt-e2.pig~ b/pig-twt-e2.pig~ new file mode 100644 index 0000000000000000000000000000000000000000..184fddcfd89cff06c604f6d08807861f1c4c7038 --- /dev/null +++ b/pig-twt-e2.pig~ @@ -0,0 +1,15 @@ +dataset = LOAD '/laboratory/twitter-small.txt' AS (id: long, fr: long); + +-- TODO: check if user IDs are valid (e.g. not null) and clean the dataset +B = FILTER dataset BY (id>0); + +-- TODO: organize data such that each node ID is associated to a list of neighbors +C = FOREACH B GENERATE id; + +D = GROUP C BY id; + +friends = FOREACH D GENERATE group, COUNT(C) AS count; + +friends2 = FILTER friends BY (count > 2) + +STORE friends2 INTO '/twitter/results1'; diff --git a/pig-twt.pig b/pig-twt.pig index 54c8b86f85aa721616f3aeefeab2627df1825a37..9af9fc4c8ebb3ffb6b50b06177a1d664509dbc0e 100644 --- a/pig-twt.pig +++ b/pig-twt.pig @@ -10,4 +10,4 @@ D = GROUP C BY id; friends = FOREACH D GENERATE group, COUNT(C); -STORE friends INTO '/twitter/results'; +STORE friends INTO '/twitter/results1'; diff --git a/pig-twt.pig~ b/pig-twt.pig~ index cf8863c2669afcdbfadc1757ca2c698a0d373192..54c8b86f85aa721616f3aeefeab2627df1825a37 100644 --- a/pig-twt.pig~ +++ b/pig-twt.pig~ @@ -1,4 +1,4 @@ -dataset = LOAD 'tw.txt' AS (id: long, fr: long); +dataset = LOAD '/laboratory/twitter-small.txt' AS (id: long, fr: long); -- TODO: check if user IDs are valid (e.g. not null) and clean the dataset B = FILTER dataset BY (id>0); @@ -10,4 +10,4 @@ D = GROUP C BY id; friends = FOREACH D GENERATE group, COUNT(C); -STORE friends INTO './local-output/OSN/twc/'; +STORE friends INTO '/twitter/results';