Commit b3b4a776 authored by Cody Zacharias's avatar Cody Zacharias Committed by GitHub

Update Spacing / Add Values

parent 63600485
from elasticsearch import Elasticsearch, helpers from elasticsearch import Elasticsearch, helpers
import contextlib import contextlib
import datetime
import time import time
import sys import sys
class RecycleObject(object): class RecycleObject(object):
def write(self, junk): pass def write(self, junk): pass
def flush(self): pass def flush(slef): pass
@contextlib.contextmanager @contextlib.contextmanager
def nostdout(): def nostdout():
savestdout = sys.stdout savestdout = sys.stdout
sys.stdout = RecycleObject() sys.stdout = RecycleObject()
yield yield
sys.stdout = savestdout sys.stdout = savestdout
def weekday(day): def weekdate(day):
weekdays = { weekdays = {
"Monday": 1, "Monday": 1,
"Tuesday": 2, "Tuesday": 2,
"Wednesday": 3, "Wednesday": 3,
"Thursday": 4, "Thursday": 4,
"Friday": 5, "Friday": 5,
"Saturday": 6, "Saturday": 6,
"Sunday": 7, "Sunday": 7,
} }
return weekdays[day] return weekdays[day]
def hour(datetime):
return time.strftime("%H", time.localtime(datetime))
def Tweet(Tweet, es, session): def Tweet(Tweet, es, session):
# Todo play around with this some more day = weekday(time.strftime("%A", time.localtime(Tweet.datetime)))
day = weekday(Tweet.date.strftime("%A"))
actions = []
actions = [] nLikes = 0
nLikes = 0 nReplies = 0
nReplies = 0 nRetweets = 0
nRetweets = 0
dt = "{} {}".format(Tweet.datestamp, Tweet.timestamp)
dt = "{} {}".format(Tweet.datestamp, Tweet.timestamp)
j_data = {
j_data = { "_index": "twint",
"_index": "twint", "_type": "items",
"_type": "items", "_id": Tweet.id + "_raw_" + session,
"_id": Tweet.id + "_raw_" + session, "_source": {
"_source": { "id": Tweet.id,
"id": Tweet.id, "date": dt,
"date": dt, "timezone": Tweet.timezone,
"timezone": Tweet.timezone, "location": Tweet.location,
"location": Tweet.location, "tweet": Tweet.tweet,
"tweet": Tweet.tweet, "hashtags": Tweet.hashtags,
"hashtags": Tweet.hashtags, "user_id": Tweet.user_id,
"user_id": Tweet.user_id, "username": Tweet.username,
"username": Tweet.username, "day": day,
"day": day, "hour": hour(Tweet.datetime),
"hour": Tweet.time.strftime("%H"), "link": Tweet.link,
"link": Tweet.link, "retweet": Tweet.is_retweet,
"essid": session "user_rt": Tweet.user_rt,
} "essid": session
} }
}
actions.append(j_data) actions.append(j_data)
for l in range(int(Tweet.likes)): for l in range(int(Tweet.likes)):
j_data = { j_data = {
"_index": "twint", "_index": "twint",
"_type": "items", "_type": "items",
"_id": Tweet.id + "_likes_" + str(nLikes) + "_" + session, "_id": Tweet.id + "_likes_" + str(nLikes) + "_" + session,
"_source": { "_source": {
"id": Tweet.id, "id": Tweet.id,
"date": dt, "date": dt,
"timezone": Tweet.timezone, "timezone": Tweet.timezone,
"location": Tweet.location, "location": Tweet.location,
"tweet": Tweet.tweet, "tweet": Tweet.tweet,
"hashtags": Tweet.hashtags, "hashtags": Tweet.hashtags,
"likes": True, "likes": True,
"user_id": Tweet.user_id, "user_id": Tweet.user_id,
"username": Tweet.username, "username": Tweet.username,
"day": day, "day": day,
"hour": Tweet.time.strftime("%H"), "hour": hour(Tweet.datetime),
"link": Tweet.link, "link": Tweet.link,
"essid": session "retweet": Tweet.is_retweet,
} "user_rt": Tweet.user_rt,
} "essid": session
}
actions.append(j_data) }
nLikes += 1 actions.append(j_data)
nLikes += 1
for rep in range(int(Tweet.replies)):
j_data = { for rep in range(int(Tweet.replies)):
"_index": "twint", j_data = {
"_type": "items", "_index": "twint",
"_id": Tweet.id + "_replies_" + str(nReplies) + "_" + session, "_type": "items",
"_source": { "_id": Tweet.id + "_replies_" + str(nReplies) + "_" + session,
"id": Tweet.id, "_source": {
"date": dt, "id": Tweet.id,
"timezone": Tweet.timezone, "date": dt,
"location": Tweet.location, "timezone": Tweet.timezone,
"tweet": Tweet.tweet, "location": Tweet.location,
"hashtags": Tweet.hashtags, "tweet": Tweet.tweet,
"replies": True, "hashtags": Tweet.hashtags,
"user_id": Tweet.user_id, "replies": True,
"username": Tweet.username, "user_id": Tweet.user_id,
"day": day, "username": Tweet.username,
"hour": Tweet.time.strftime("%H"), "day": day,
"link": Tweet.link, "hour": hour(Tweet.datetime),
"essid": session "link": Tweet.link,
} "retweet": Tweet.is_retweet,
} "user_rt": Tweet.user_rt,
"essid": session
actions.append(j_data) }
nReplies += 1 }
actions.append(j_data)
for ret in range(int(Tweet.retweets)): nReplies += 1
j_data = {
"_index": "twint", for ret in range(int(Tweet.retweets)):
"_type": "items", j_data = {
"_id": Tweet.id + "_retweets_" + str(nRetweets) + "_" + session, "_index": "twint",
"_source": { "_type": "items",
"id": Tweet.id, "_id": Tweet.id + "_retweets_" + str(nRetweets) + "_" + session,
"date": dt, "_source": {
"timezone": Tweet.timezone, "id": Tweet.id,
"location": Tweet.location, "date": dt,
"tweet": Tweet.tweet, "timezone": Tweet.timezone,
"hashtags": Tweet.hashtags, "location": Tweet.location,
"retweets": True, "tweet": Tweet.tweet,
"user_id": Tweet.user_id, "hashtags": Tweet.hashtags,
"username": Tweet.username, "retweets": True,
"day": day, "user_id": Tweet.user_id,
"hour": Tweet.time.strftime("%H"), "username": Tweet.username,
"link": Tweet.link, "day": day,
"essid": session "hour": hour(Tweet.datetime),
} "link": Tweet.link,
} "retweet": Tweet.is_retweet,
"user_rt": Tweet.user_rt,
actions.append(j_data) "essid": session
nRetweets += 1 }
}
es = Elasticsearch(es) actions.append(j_data)
with nostdout(): nRetweets += 1
helpers.bulk(es, actions, chunk_size=2000, request_timeout=200)
actions = [] es = Elasticsearch(es)
with nostdout():
helpers.bulk(es, actions, chunk_size=2000, request_timeout=200)
actions = []
def Follow(es, user, follow, session): def Follow(es, user, follow, session):
actions = [] actions = []
j_data = { j_data = {
"_index": "twintgraph2", "_index": "twintgraph2",
"_type": "items", "_type": "items",
"_id": user + "_" + follow + "_" + session, "_id": user + "_" + follow + "_" + session,
"_source": { "_source": {
"user": user, "user": user,
"follow": follow, "follow": follow,
"essid": session "essid": session
} }
} }
actions.append(j_data)
actions.append(j_data)
es = Elasticsearch(es)
es = Elasticsearch(es) with nostdout():
with nostdout(): helpers.bulk(es, actions, chunk_size=2000, request_timeout=200)
helpers.bulk(es, actions, chunk_size=2000, request_timeout=200) actions = []
actions = []
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment