Commit b22ca858 authored by Francesco Poldi's avatar Francesco Poldi

Merge branch 'master' into dev

parents ecfe44bc 43af4cba
......@@ -112,7 +112,10 @@ def initialize(args):
c.Profile_full = args.profile_full
c.Store_pandas = args.store_pandas
c.Pandas_type = args.pandas_type
c.search_name = args.search_name
c.Search_name = args.search_name
c.Index_tweets = args.index_tweets
c.Index_follow = args.index_follow
c.Index_users = args.index_users
return c
def options():
......@@ -165,7 +168,10 @@ def options():
action="store_true")
ap.add_argument("--store-pandas", help="Save Tweets in a DataFrame (Pandas) file.")
ap.add_argument("--pandas-type", help="Specify HDF5 or Pickle (HDF5 as default)")
ap.add_argument("--search_name", help="name for identify the search like -3dprinter stuff- only for mysql")
ap.add_argument("--search_name", help="Name for identify the search like -3dprinter stuff- only for mysql")
ap.add_argument("-it", "--index-tweets", help="Custom Elasticsearch Index name for Tweets.")
ap.add_argument("-if", "--index-follow", help="Custom Elasticsearch Index name for Follows.")
ap.add_argument("-iu", "--index-users", help="Custom Elasticsearch Index name for Users.")
args = ap.parse_args()
return args
......@@ -180,6 +186,15 @@ def main():
if not args.pandas_type:
args.pandas_type = "HDF5"
if not args.index_tweets:
args.index_tweets = "twint"
if not args.index_follow:
args.index_follow = "twintGraph"
if not args.index_users:
args.index_users = "twintUser"
c = initialize(args)
if args.favorites:
......
......@@ -41,4 +41,7 @@ class Config:
Store_pandas = False
Pandas_type = None
Pandas = False
search_name = "-" #for identify a records in mysql with the search it provides from. it cannot be null for DB requirements. a tweet must be in several search so the PK are tweet ID and search_name
Search_name = "-" #for identify a records in mysql with the search it provides from. it cannot be null for DB requirements. a tweet must be in several search so the PK are tweet ID and search_name
Index_tweets = None
Index_follow = None
Index_users = None
......@@ -30,7 +30,7 @@ def weekday(day):
def hour(datetime):
return strftime("%H", localtime(datetime))
def Tweet(Tweet, es, session):
def Tweet(Tweet, config):
day = weekday(strftime("%A", localtime(Tweet.datetime)))
actions = []
......@@ -41,9 +41,9 @@ def Tweet(Tweet, es, session):
dt = "{} {}".format(Tweet.datestamp, Tweet.timestamp)
j_data = {
"_index": "twint",
"_index": config.Index_tweets,
"_type": "items",
"_id": Tweet.id + "_raw_" + str(session),
"_id": Tweet.id + "_raw_" + config.Essid,
"_source": {
"id": Tweet.id,
"date": dt,
......@@ -58,16 +58,16 @@ def Tweet(Tweet, es, session):
"link": Tweet.link,
"retweet": Tweet.retweet,
"user_rt": Tweet.user_rt,
"essid": str(session)
"essid": config.Essid
}
}
actions.append(j_data)
for l in range(int(Tweet.likes)):
j_data = {
"_index": "twint",
"_index": config.Index_tweets,
"_type": "items",
"_id": Tweet.id + "_likes_" + str(nLikes) + "_" + str(session),
"_id": Tweet.id + "_likes_" + str(nLikes) + "_" + config.Essid,
"_source": {
"id": Tweet.id,
"date": dt,
......@@ -83,7 +83,7 @@ def Tweet(Tweet, es, session):
"link": Tweet.link,
"retweet": Tweet.retweet,
"user_rt": Tweet.user_rt,
"essid": str(session)
"essid": config.Essid
}
}
actions.append(j_data)
......@@ -91,9 +91,9 @@ def Tweet(Tweet, es, session):
for rep in range(int(Tweet.replies)):
j_data = {
"_index": "twint",
"_index": config.Index_tweets,
"_type": "items",
"_id": Tweet.id + "_replies_" + str(nReplies) + "_" + str(session),
"_id": Tweet.id + "_replies_" + str(nReplies) + "_" + config.Essid,
"_source": {
"id": Tweet.id,
"date": dt,
......@@ -109,7 +109,7 @@ def Tweet(Tweet, es, session):
"link": Tweet.link,
"retweet": Tweet.retweet,
"user_rt": Tweet.user_rt,
"essid": str(session)
"essid": config.Essid
}
}
actions.append(j_data)
......@@ -117,9 +117,9 @@ def Tweet(Tweet, es, session):
for ret in range(int(Tweet.retweets)):
j_data = {
"_index": "twint",
"_index": config.Index_tweets,
"_type": "items",
"_id": Tweet.id + "_retweets_" + str(nRetweets) + "_" + str(session),
"_id": Tweet.id + "_retweets_" + str(nRetweets) + "_" + config.Essid,
"_source": {
"id": Tweet.id,
"date": dt,
......@@ -135,44 +135,44 @@ def Tweet(Tweet, es, session):
"link": Tweet.link,
"retweet": Tweet.retweet,
"user_rt": Tweet.user_rt,
"essid": str(session)
"essid": config.Essid
}
}
actions.append(j_data)
nRetweets += 1
es = Elasticsearch(es)
es = Elasticsearch(config.Elasticsearch)
with nostdout():
helpers.bulk(es, actions, chunk_size=2000, request_timeout=200)
actions = []
def Follow(es, user, follow, session):
def Follow(user, config):
actions = []
j_data = {
"_index": "twintGraph",
"_index": config.Index_follow,
"_type": "items",
"_id": user + "_" + follow + "_" + str(session),
"_id": user + "_" + config.Username + "_" + config.Essid,
"_source": {
"user": user,
"follow": follow,
"essid": str(session)
"follow": config.Username,
"essid": config.Essid
}
}
actions.append(j_data)
es = Elasticsearch(es)
es = Elasticsearch(config.Elasticsearch)
with nostdout():
helpers.bulk(es, actions, chunk_size=2000, request_timeout=200)
actions = []
def UserProfile(es, user, follow, session):
def UserProfile(user, config):
actions = []
j_data = {
"_index": "twintUser",
"_index": config.Index_users,
"_type": "items",
"_id": user.id + "_" + user.join_date + "_" + user.join_time + "_" + str(session),
"_id": user.id + "_" + user.join_date + "_" + user.join_time + "_" + config.Essid,
"_source": {
"id": user.id,
"name": user.name,
......@@ -191,12 +191,12 @@ def UserProfile(es, user, follow, session):
"private": user.is_private,
"verified": user.is_verified,
"avatar": user.avatar,
"session": str(session)
"session": config.Essid
}
}
actions.append(j_data)
es = Elasticsearch(es)
es = Elasticsearch(config.Elasticsearch)
with nostdout():
helpers.bulk(es, actions, chunk_size=2000, request_timeout=200)
actions = []
......@@ -56,7 +56,7 @@ async def Tweets(tw, location, config, conn):
elif config.Database:
db.tweets(conn, tweet, config)
if config.Elasticsearch:
elasticsearch.Tweet(tweet, config.Elasticsearch, config.Essid)
elasticsearch.Tweet(tweet, config)
_output(tweet, output, config)
async def Users(u, config, conn):
......@@ -73,8 +73,7 @@ async def Users(u, config, conn):
_save_time = user.join_time
user.join_date = str(datetime.strptime(user.join_date, "%d %b %Y")).split()[0]
user.join_time = str(datetime.strptime(user.join_time, "%I:%M %p")).split()[1]
elasticsearch.UserProfile(config.Elasticsearch, user,
config.Username, config.Essid)
elasticsearch.UserProfile(user, config)
user.join_date = _save_date
user.join_time = _save_time
......@@ -87,7 +86,6 @@ async def Username(username, config, conn):
db.follow(conn, config.Username, config.Followers, username)
if config.Elasticsearch:
elasticsearch.Follow(config.Elasticsearch, username,
config.Username, config.Essid)
elasticsearch.Follow(username, config)
_output(username, username, config)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment