Commit 2348211b authored by Chris Partridge's avatar Chris Partridge Committed by GitHub

Fixes Publishing Data to Elasticsearch (#994)

* Fix ES publishing

* Remove hour() from elasticsearch.py
parent 52ee7522
## TODO - Fix Weekday situation ## TODO - Fix Weekday situation
from elasticsearch import Elasticsearch, helpers from elasticsearch import Elasticsearch, helpers
from geopy.geocoders import Nominatim from geopy.geocoders import Nominatim
from time import strftime, localtime from datetime import datetime
import contextlib import contextlib
import sys import sys
...@@ -58,7 +58,7 @@ def createIndex(config, instance, **scope): ...@@ -58,7 +58,7 @@ def createIndex(config, instance, **scope):
"properties": { "properties": {
"id": {"type": "long"}, "id": {"type": "long"},
"conversation_id": {"type": "long"}, "conversation_id": {"type": "long"},
"created_at": {"type": "long"}, "created_at": {"type": "text"},
"date": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss"}, "date": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss"},
"timezone": {"type": "keyword"}, "timezone": {"type": "keyword"},
"place": {"type": "keyword"}, "place": {"type": "keyword"},
...@@ -193,25 +193,18 @@ def weekday(day): ...@@ -193,25 +193,18 @@ def weekday(day):
return weekdays[day] return weekdays[day]
def hour(datetime):
return strftime("%H", localtime(datetime))
def Tweet(Tweet, config): def Tweet(Tweet, config):
global _index_tweet_status global _index_tweet_status
global _is_near_def global _is_near_def
weekdays = { date_obj = datetime.strptime(Tweet.datetime, "%Y-%m-%d %H:%M:%S %Z")
"Monday": 1,
"Tuesday": 2,
"Wednesday": 3,
"Thursday": 4,
"Friday": 5,
"Saturday": 6,
"Sunday": 7,
}
day = weekdays[strftime("%A", localtime(Tweet.datetime/1000))]
actions = [] actions = []
try:
retweet = Tweet.retweet
except AttributeError:
retweet = None
dt = f"{Tweet.datestamp} {Tweet.timestamp}" dt = f"{Tweet.datestamp} {Tweet.timestamp}"
j_data = { j_data = {
...@@ -231,10 +224,10 @@ def Tweet(Tweet, config): ...@@ -231,10 +224,10 @@ def Tweet(Tweet, config):
"user_id_str": Tweet.user_id_str, "user_id_str": Tweet.user_id_str,
"username": Tweet.username, "username": Tweet.username,
"name": Tweet.name, "name": Tweet.name,
"day": day, "day": date_obj.weekday(),
"hour": hour(Tweet.datetime/1000), "hour": date_obj.hour,
"link": Tweet.link, "link": Tweet.link,
"retweet": Tweet.retweet, "retweet": retweet,
"essid": config.Essid, "essid": config.Essid,
"nlikes": int(Tweet.likes_count), "nlikes": int(Tweet.likes_count),
"nreplies": int(Tweet.replies_count), "nreplies": int(Tweet.replies_count),
...@@ -245,7 +238,7 @@ def Tweet(Tweet, config): ...@@ -245,7 +238,7 @@ def Tweet(Tweet, config):
"near": config.Near "near": config.Near
} }
} }
if Tweet.retweet: if retweet is not None:
j_data["_source"].update({"user_rt_id": Tweet.user_rt_id}) j_data["_source"].update({"user_rt_id": Tweet.user_rt_id})
j_data["_source"].update({"user_rt": Tweet.user_rt}) j_data["_source"].update({"user_rt": Tweet.user_rt})
j_data["_source"].update({"retweet_id": Tweet.retweet_id}) j_data["_source"].update({"retweet_id": Tweet.retweet_id})
......
import datetime, pandas as pd, warnings import datetime, pandas as pd, warnings
from time import strftime, localtime from time import strftime, localtime
from .elasticsearch import hour
from twint.tweet import Tweet_formats from twint.tweet import Tweet_formats
Tweets_df = None Tweets_df = None
...@@ -85,7 +84,7 @@ def update(object, config): ...@@ -85,7 +84,7 @@ def update(object, config):
"username": Tweet.username, "username": Tweet.username,
"name": Tweet.name, "name": Tweet.name,
"day": day, "day": day,
"hour": hour(datetime_ms/1000), "hour": datetime.strptime("%H", localtime(datetime_ms/1000)),
"link": Tweet.link, "link": Tweet.link,
"urls": Tweet.urls, "urls": Tweet.urls,
"photos": Tweet.photos, "photos": Tweet.photos,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment