Commit 9a756db3 authored by Francesco Poldi's avatar Francesco Poldi

Fixes plus pandas dev

parent 5f4afa27
...@@ -103,6 +103,8 @@ def initialize(args): ...@@ -103,6 +103,8 @@ def initialize(args):
c.Format = args.format c.Format = args.format
c.User_full = args.user_full c.User_full = args.user_full
c.Profile_full = args.profile_full c.Profile_full = args.profile_full
c.Store_pandas = args.store_pandas
c.Pandas_type = args.pandas_type
return c return c
def options(): def options():
...@@ -150,6 +152,8 @@ def options(): ...@@ -150,6 +152,8 @@ def options():
ap.add_argument("--profile-full", ap.add_argument("--profile-full",
help="Slow, but effective method of collecting a user's Tweets (Including Retweets).", help="Slow, but effective method of collecting a user's Tweets (Including Retweets).",
action="store_true") action="store_true")
ap.add_argument("--store-pandas", help="Save Tweets in a DataFrame (Pandas) file.")
ap.add_argument("--pandas-type", help="Specify HDF5 or Pickle (HDF5 as default)")
args = ap.parse_args() args = ap.parse_args()
return args return args
...@@ -160,6 +164,9 @@ def main(): ...@@ -160,6 +164,9 @@ def main():
if args.userlist: if args.userlist:
args.username = loadUserList(args.userlist, "search") args.username = loadUserList(args.userlist, "search")
if not args.pandas_type:
args.pandas_type = "HDF5"
c = initialize(args) c = initialize(args)
if args.favorites: if args.favorites:
......
...@@ -35,3 +35,6 @@ class Config: ...@@ -35,3 +35,6 @@ class Config:
User_full = False User_full = False
Profile_full = False Profile_full = False
Store_object = False Store_object = False
Store_pandas = False
Pandas_type = None
Pandas = False
\ No newline at end of file
from elasticsearch import Elasticsearch, helpers from elasticsearch import Elasticsearch, helpers
from sys import stdout
from time import strftime, localtime from time import strftime, localtime
import contextlib import contextlib
import sys
class RecycleObject(object): class RecycleObject(object):
def write(self, junk): pass def write(self, junk): pass
def flush(slef): pass def flush(self): pass
@contextlib.contextmanager @contextlib.contextmanager
def nostdout(): def nostdout():
savestdout = stdout savestdout = sys.stdout
stdout = RecycleObject() stdout = RecycleObject()
yield yield
stdout = savestdout stdout = savestdout
def weekdate(day): def weekday(day):
weekdays = { weekdays = {
"Monday": 1, "Monday": 1,
"Tuesday": 2, "Tuesday": 2,
...@@ -43,7 +43,7 @@ def Tweet(Tweet, es, session): ...@@ -43,7 +43,7 @@ def Tweet(Tweet, es, session):
j_data = { j_data = {
"_index": "twint", "_index": "twint",
"_type": "items", "_type": "items",
"_id": Tweet.id + "_raw_" + session, "_id": Tweet.id + "_raw_" + str(session),
"_source": { "_source": {
"id": Tweet.id, "id": Tweet.id,
"date": dt, "date": dt,
...@@ -58,7 +58,7 @@ def Tweet(Tweet, es, session): ...@@ -58,7 +58,7 @@ def Tweet(Tweet, es, session):
"link": Tweet.link, "link": Tweet.link,
"retweet": Tweet.retweet, "retweet": Tweet.retweet,
"user_rt": Tweet.user_rt, "user_rt": Tweet.user_rt,
"essid": session "essid": str(session)
} }
} }
actions.append(j_data) actions.append(j_data)
...@@ -67,7 +67,7 @@ def Tweet(Tweet, es, session): ...@@ -67,7 +67,7 @@ def Tweet(Tweet, es, session):
j_data = { j_data = {
"_index": "twint", "_index": "twint",
"_type": "items", "_type": "items",
"_id": Tweet.id + "_likes_" + str(nLikes) + "_" + session, "_id": Tweet.id + "_likes_" + str(nLikes) + "_" + str(session),
"_source": { "_source": {
"id": Tweet.id, "id": Tweet.id,
"date": dt, "date": dt,
...@@ -83,7 +83,7 @@ def Tweet(Tweet, es, session): ...@@ -83,7 +83,7 @@ def Tweet(Tweet, es, session):
"link": Tweet.link, "link": Tweet.link,
"retweet": Tweet.retweet, "retweet": Tweet.retweet,
"user_rt": Tweet.user_rt, "user_rt": Tweet.user_rt,
"essid": session "essid": str(session)
} }
} }
actions.append(j_data) actions.append(j_data)
...@@ -93,7 +93,7 @@ def Tweet(Tweet, es, session): ...@@ -93,7 +93,7 @@ def Tweet(Tweet, es, session):
j_data = { j_data = {
"_index": "twint", "_index": "twint",
"_type": "items", "_type": "items",
"_id": Tweet.id + "_replies_" + str(nReplies) + "_" + session, "_id": Tweet.id + "_replies_" + str(nReplies) + "_" + str(session),
"_source": { "_source": {
"id": Tweet.id, "id": Tweet.id,
"date": dt, "date": dt,
...@@ -109,7 +109,7 @@ def Tweet(Tweet, es, session): ...@@ -109,7 +109,7 @@ def Tweet(Tweet, es, session):
"link": Tweet.link, "link": Tweet.link,
"retweet": Tweet.retweet, "retweet": Tweet.retweet,
"user_rt": Tweet.user_rt, "user_rt": Tweet.user_rt,
"essid": session "essid": str(session)
} }
} }
actions.append(j_data) actions.append(j_data)
...@@ -119,7 +119,7 @@ def Tweet(Tweet, es, session): ...@@ -119,7 +119,7 @@ def Tweet(Tweet, es, session):
j_data = { j_data = {
"_index": "twint", "_index": "twint",
"_type": "items", "_type": "items",
"_id": Tweet.id + "_retweets_" + str(nRetweets) + "_" + session, "_id": Tweet.id + "_retweets_" + str(nRetweets) + "_" + str(session),
"_source": { "_source": {
"id": Tweet.id, "id": Tweet.id,
"date": dt, "date": dt,
...@@ -135,7 +135,7 @@ def Tweet(Tweet, es, session): ...@@ -135,7 +135,7 @@ def Tweet(Tweet, es, session):
"link": Tweet.link, "link": Tweet.link,
"retweet": Tweet.retweet, "retweet": Tweet.retweet,
"user_rt": Tweet.user_rt, "user_rt": Tweet.user_rt,
"essid": session "essid": str(session)
} }
} }
actions.append(j_data) actions.append(j_data)
...@@ -152,11 +152,11 @@ def Follow(es, user, follow, session): ...@@ -152,11 +152,11 @@ def Follow(es, user, follow, session):
j_data = { j_data = {
"_index": "twintGraph", "_index": "twintGraph",
"_type": "items", "_type": "items",
"_id": user + "_" + follow + "_" + session, "_id": user + "_" + follow + "_" + str(session),
"_source": { "_source": {
"user": user, "user": user,
"follow": follow, "follow": follow,
"essid": session "essid": str(session)
} }
} }
actions.append(j_data) actions.append(j_data)
...@@ -172,7 +172,7 @@ def UserProfile(es, user, follow, session): ...@@ -172,7 +172,7 @@ def UserProfile(es, user, follow, session):
j_data = { j_data = {
"_index": "twintUser", "_index": "twintUser",
"_type": "items", "_type": "items",
"_id": user.id + "_" + user.join_date + "_" + user.join_time + "_" + session, "_id": user.id + "_" + user.join_date + "_" + user.join_time + "_" + str(session),
"_source": { "_source": {
"id": user.id, "id": user.id,
"name": user.name, "name": user.name,
...@@ -191,7 +191,7 @@ def UserProfile(es, user, follow, session): ...@@ -191,7 +191,7 @@ def UserProfile(es, user, follow, session):
"private": user.is_private, "private": user.is_private,
"verified": user.is_verified, "verified": user.is_verified,
"avatar": user.avatar, "avatar": user.avatar,
"session": session "session": str(session)
} }
} }
actions.append(j_data) actions.append(j_data)
......
from datetime import datetime from datetime import datetime
import Pandas
from . import db, elasticsearch, format, write from . import db, elasticsearch, format, write
from .tweet import Tweet from .tweet import Tweet
from .user import User from .user import User
...@@ -30,14 +31,17 @@ def _output(obj, output, config): ...@@ -30,14 +31,17 @@ def _output(obj, output, config):
else: else:
write.Text(output, config.Output) write.Text(output, config.Output)
if config.Pandas:
Pandas.update(obj, config.Session)
if config.Elasticsearch: if config.Elasticsearch:
if config.Store_object: if config.Store_object:
tweets_object.append(tweet) tweets_object.append(obj)
else: else:
print(output, end=".", flush=True) print(output, end=".", flush=True)
else: else:
if config.Store_object: if config.Store_object:
tweets_object.append(tweet) tweets_object.append(obj)
else: else:
print(output) print(output)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment