Commit da742ea5 authored by Francesco Poldi's avatar Francesco Poldi

Fixes plus pandas dev

parent 8030b30e
......@@ -103,6 +103,8 @@ def initialize(args):
c.Format = args.format
c.User_full = args.user_full
c.Profile_full = args.profile_full
c.Store_pandas = args.store_pandas
c.Pandas_type = args.pandas_type
return c
def options():
......@@ -150,6 +152,8 @@ def options():
ap.add_argument("--profile-full",
help="Slow, but effective method of collecting a user's Tweets (Including Retweets).",
action="store_true")
ap.add_argument("--store-pandas", help="Save Tweets in a DataFrame (Pandas) file.")
ap.add_argument("--pandas-type", help="Specify HDF5 or Pickle (HDF5 as default)")
args = ap.parse_args()
return args
......@@ -160,6 +164,9 @@ def main():
if args.userlist:
args.username = loadUserList(args.userlist, "search")
if not args.pandas_type:
args.pandas_type = "HDF5"
c = initialize(args)
if args.favorites:
......
......@@ -35,3 +35,6 @@ class Config:
User_full = False
Profile_full = False
Store_object = False
Store_pandas = False
Pandas_type = None
Pandas = False
\ No newline at end of file
from elasticsearch import Elasticsearch, helpers
from sys import stdout
from time import strftime, localtime
import contextlib
import sys
class RecycleObject(object):
def write(self, junk): pass
def flush(slef): pass
def flush(self): pass
@contextlib.contextmanager
def nostdout():
savestdout = stdout
savestdout = sys.stdout
stdout = RecycleObject()
yield
stdout = savestdout
def weekdate(day):
def weekday(day):
weekdays = {
"Monday": 1,
"Tuesday": 2,
......@@ -43,7 +43,7 @@ def Tweet(Tweet, es, session):
j_data = {
"_index": "twint",
"_type": "items",
"_id": Tweet.id + "_raw_" + session,
"_id": Tweet.id + "_raw_" + str(session),
"_source": {
"id": Tweet.id,
"date": dt,
......@@ -58,7 +58,7 @@ def Tweet(Tweet, es, session):
"link": Tweet.link,
"retweet": Tweet.retweet,
"user_rt": Tweet.user_rt,
"essid": session
"essid": str(session)
}
}
actions.append(j_data)
......@@ -67,7 +67,7 @@ def Tweet(Tweet, es, session):
j_data = {
"_index": "twint",
"_type": "items",
"_id": Tweet.id + "_likes_" + str(nLikes) + "_" + session,
"_id": Tweet.id + "_likes_" + str(nLikes) + "_" + str(session),
"_source": {
"id": Tweet.id,
"date": dt,
......@@ -83,7 +83,7 @@ def Tweet(Tweet, es, session):
"link": Tweet.link,
"retweet": Tweet.retweet,
"user_rt": Tweet.user_rt,
"essid": session
"essid": str(session)
}
}
actions.append(j_data)
......@@ -93,7 +93,7 @@ def Tweet(Tweet, es, session):
j_data = {
"_index": "twint",
"_type": "items",
"_id": Tweet.id + "_replies_" + str(nReplies) + "_" + session,
"_id": Tweet.id + "_replies_" + str(nReplies) + "_" + str(session),
"_source": {
"id": Tweet.id,
"date": dt,
......@@ -109,7 +109,7 @@ def Tweet(Tweet, es, session):
"link": Tweet.link,
"retweet": Tweet.retweet,
"user_rt": Tweet.user_rt,
"essid": session
"essid": str(session)
}
}
actions.append(j_data)
......@@ -119,7 +119,7 @@ def Tweet(Tweet, es, session):
j_data = {
"_index": "twint",
"_type": "items",
"_id": Tweet.id + "_retweets_" + str(nRetweets) + "_" + session,
"_id": Tweet.id + "_retweets_" + str(nRetweets) + "_" + str(session),
"_source": {
"id": Tweet.id,
"date": dt,
......@@ -135,7 +135,7 @@ def Tweet(Tweet, es, session):
"link": Tweet.link,
"retweet": Tweet.retweet,
"user_rt": Tweet.user_rt,
"essid": session
"essid": str(session)
}
}
actions.append(j_data)
......@@ -152,11 +152,11 @@ def Follow(es, user, follow, session):
j_data = {
"_index": "twintGraph",
"_type": "items",
"_id": user + "_" + follow + "_" + session,
"_id": user + "_" + follow + "_" + str(session),
"_source": {
"user": user,
"follow": follow,
"essid": session
"essid": str(session)
}
}
actions.append(j_data)
......@@ -172,7 +172,7 @@ def UserProfile(es, user, follow, session):
j_data = {
"_index": "twintUser",
"_type": "items",
"_id": user.id + "_" + user.join_date + "_" + user.join_time + "_" + session,
"_id": user.id + "_" + user.join_date + "_" + user.join_time + "_" + str(session),
"_source": {
"id": user.id,
"name": user.name,
......@@ -191,7 +191,7 @@ def UserProfile(es, user, follow, session):
"private": user.is_private,
"verified": user.is_verified,
"avatar": user.avatar,
"session": session
"session": str(session)
}
}
actions.append(j_data)
......
from datetime import datetime
import Pandas
from . import db, elasticsearch, format, write
from .tweet import Tweet
from .user import User
......@@ -30,14 +31,17 @@ def _output(obj, output, config):
else:
write.Text(output, config.Output)
if config.Pandas:
Pandas.update(obj, config.Session)
if config.Elasticsearch:
if config.Store_object:
tweets_object.append(tweet)
tweets_object.append(obj)
else:
print(output, end=".", flush=True)
else:
if config.Store_object:
tweets_object.append(tweet)
tweets_object.append(obj)
else:
print(output)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment