Commit 59c96010 authored by John Morrow's avatar John Morrow Committed by GitHub

Adding language to tweet metadata (#749)

* Added language to tweetData and tweetFieldnames

* Added language to Tweet object as t.lang

* Added language to Tweet()

* added language field to update()

* updated to include language
Co-authored-by: default avatarJMorrow <jomorrcode@outlook.com>
parent 40c6fd0d
......@@ -16,6 +16,7 @@ def Tweet(config, t):
output = output.replace("{photos}", ",".join(t.photos))
output = output.replace("{video}", str(t.video))
output = output.replace("{tweet}", t.tweet)
output = output.replace("{language}", t.lang)
output = output.replace("{hashtags}", ",".join(t.hashtags))
output = output.replace("{cashtags}", ",".join(t.cashtags))
output = output.replace("{replies}", t.replies_count)
......
......@@ -56,6 +56,7 @@ def init(db):
id integer not null,
id_str text not null,
tweet text default '',
language text default '',
conversation_id text not null,
created_at integer not null,
date text not null,
......@@ -244,6 +245,7 @@ def tweets(conn, Tweet, config):
entry = (Tweet.id,
Tweet.id_str,
Tweet.tweet,
Tweet.language,
Tweet.conversation_id,
Tweet.datetime,
Tweet.datestamp,
......@@ -272,7 +274,7 @@ def tweets(conn, Tweet, config):
Tweet.translate,
Tweet.trans_src,
Tweet.trans_dest)
cursor.execute('INSERT INTO tweets VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)', entry)
cursor.execute('INSERT INTO tweets VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)', entry)
if config.Favorites:
query = 'INSERT INTO favorites VALUES(?,?)'
......
......@@ -64,6 +64,7 @@ def createIndex(config, instance, **scope):
"place": {"type": "keyword"},
"location": {"type": "keyword"},
"tweet": {"type": "text"},
"lang": {"type": "keyword"},
"hashtags": {"type": "keyword", "normalizer": "hashtag_normalizer"},
"cashtags": {"type": "keyword", "normalizer": "hashtag_normalizer"},
"user_id_str": {"type": "keyword"},
......@@ -223,6 +224,7 @@ def Tweet(Tweet, config):
"timezone": Tweet.timezone,
"place": Tweet.place,
"tweet": Tweet.tweet,
"language": Tweet.lang,
"hashtags": Tweet.hashtags,
"cashtags": Tweet.cashtags,
"user_id_str": Tweet.user_id_str,
......
......@@ -76,6 +76,7 @@ def update(object, config):
"timezone": Tweet.timezone,
"place": Tweet.place,
"tweet": Tweet.tweet,
"language": Tweet.lang,
"hashtags": Tweet.hashtags,
"cashtags": Tweet.cashtags,
"user_id": Tweet.user_id,
......
......@@ -11,6 +11,7 @@ def tweetData(t):
"name": t.name,
"place": t.place,
"tweet": t.tweet,
"language": t.lang,
"mentions": t.mentions,
"urls": t.urls,
"photos": t.photos,
......@@ -50,6 +51,7 @@ def tweetFieldnames():
"name",
"place",
"tweet",
"language",
"mentions",
"urls",
"photos",
......
......@@ -98,6 +98,7 @@ def Tweet(tw, config):
t.photos = [photo_node.attrs['data-image-url'] for photo_node in tw.find_all("div", "AdaptiveMedia-photoContainer")]
t.video = 1 if tw.find_all("div", "AdaptiveMedia-video") != [] else 0
t.tweet = getText(tw)
t.lang = tw.find('p', 'tweet-text')['lang']
t.hashtags = [hashtag.text for hashtag in tw.find_all("a","twitter-hashtag")]
t.cashtags = [cashtag.text for cashtag in tw.find_all("a", "twitter-cashtag")]
t.replies_count = getStat(tw, "reply")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment