Commit 2348211b authored by Chris Partridge's avatar Chris Partridge Committed by GitHub

Fixes Publishing Data to Elasticsearch (#994)

* Fix ES publishing

* Remove hour() from elasticsearch.py
parent 52ee7522
## TODO - Fix Weekday situation
from elasticsearch import Elasticsearch, helpers
from geopy.geocoders import Nominatim
from time import strftime, localtime
from datetime import datetime
import contextlib
import sys
......@@ -58,7 +58,7 @@ def createIndex(config, instance, **scope):
"properties": {
"id": {"type": "long"},
"conversation_id": {"type": "long"},
"created_at": {"type": "long"},
"created_at": {"type": "text"},
"date": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss"},
"timezone": {"type": "keyword"},
"place": {"type": "keyword"},
......@@ -193,25 +193,18 @@ def weekday(day):
return weekdays[day]
def hour(datetime):
return strftime("%H", localtime(datetime))
def Tweet(Tweet, config):
global _index_tweet_status
global _is_near_def
weekdays = {
"Monday": 1,
"Tuesday": 2,
"Wednesday": 3,
"Thursday": 4,
"Friday": 5,
"Saturday": 6,
"Sunday": 7,
}
day = weekdays[strftime("%A", localtime(Tweet.datetime/1000))]
date_obj = datetime.strptime(Tweet.datetime, "%Y-%m-%d %H:%M:%S %Z")
actions = []
try:
retweet = Tweet.retweet
except AttributeError:
retweet = None
dt = f"{Tweet.datestamp} {Tweet.timestamp}"
j_data = {
......@@ -231,10 +224,10 @@ def Tweet(Tweet, config):
"user_id_str": Tweet.user_id_str,
"username": Tweet.username,
"name": Tweet.name,
"day": day,
"hour": hour(Tweet.datetime/1000),
"day": date_obj.weekday(),
"hour": date_obj.hour,
"link": Tweet.link,
"retweet": Tweet.retweet,
"retweet": retweet,
"essid": config.Essid,
"nlikes": int(Tweet.likes_count),
"nreplies": int(Tweet.replies_count),
......@@ -245,7 +238,7 @@ def Tweet(Tweet, config):
"near": config.Near
}
}
if Tweet.retweet:
if retweet is not None:
j_data["_source"].update({"user_rt_id": Tweet.user_rt_id})
j_data["_source"].update({"user_rt": Tweet.user_rt})
j_data["_source"].update({"retweet_id": Tweet.retweet_id})
......
import datetime, pandas as pd, warnings
from time import strftime, localtime
from .elasticsearch import hour
from twint.tweet import Tweet_formats
Tweets_df = None
......@@ -85,7 +84,7 @@ def update(object, config):
"username": Tweet.username,
"name": Tweet.name,
"day": day,
"hour": hour(datetime_ms/1000),
"hour": datetime.strptime("%H", localtime(datetime_ms/1000)),
"link": Tweet.link,
"urls": Tweet.urls,
"photos": Tweet.photos,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment