Commit 59c96010 authored by John Morrow's avatar John Morrow Committed by GitHub

Adding language to tweet metadata (#749)

* Added language to tweetData and tweetFieldnames

* Added language to Tweet object as t.lang

* Added language to Tweet()

* added language field to update()

* updated to include language
Co-authored-by: default avatarJMorrow <jomorrcode@outlook.com>
parent 40c6fd0d
...@@ -16,6 +16,7 @@ def Tweet(config, t): ...@@ -16,6 +16,7 @@ def Tweet(config, t):
output = output.replace("{photos}", ",".join(t.photos)) output = output.replace("{photos}", ",".join(t.photos))
output = output.replace("{video}", str(t.video)) output = output.replace("{video}", str(t.video))
output = output.replace("{tweet}", t.tweet) output = output.replace("{tweet}", t.tweet)
output = output.replace("{language}", t.lang)
output = output.replace("{hashtags}", ",".join(t.hashtags)) output = output.replace("{hashtags}", ",".join(t.hashtags))
output = output.replace("{cashtags}", ",".join(t.cashtags)) output = output.replace("{cashtags}", ",".join(t.cashtags))
output = output.replace("{replies}", t.replies_count) output = output.replace("{replies}", t.replies_count)
......
...@@ -56,6 +56,7 @@ def init(db): ...@@ -56,6 +56,7 @@ def init(db):
id integer not null, id integer not null,
id_str text not null, id_str text not null,
tweet text default '', tweet text default '',
language text default '',
conversation_id text not null, conversation_id text not null,
created_at integer not null, created_at integer not null,
date text not null, date text not null,
...@@ -244,6 +245,7 @@ def tweets(conn, Tweet, config): ...@@ -244,6 +245,7 @@ def tweets(conn, Tweet, config):
entry = (Tweet.id, entry = (Tweet.id,
Tweet.id_str, Tweet.id_str,
Tweet.tweet, Tweet.tweet,
Tweet.language,
Tweet.conversation_id, Tweet.conversation_id,
Tweet.datetime, Tweet.datetime,
Tweet.datestamp, Tweet.datestamp,
...@@ -272,7 +274,7 @@ def tweets(conn, Tweet, config): ...@@ -272,7 +274,7 @@ def tweets(conn, Tweet, config):
Tweet.translate, Tweet.translate,
Tweet.trans_src, Tweet.trans_src,
Tweet.trans_dest) Tweet.trans_dest)
cursor.execute('INSERT INTO tweets VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)', entry) cursor.execute('INSERT INTO tweets VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)', entry)
if config.Favorites: if config.Favorites:
query = 'INSERT INTO favorites VALUES(?,?)' query = 'INSERT INTO favorites VALUES(?,?)'
......
...@@ -64,6 +64,7 @@ def createIndex(config, instance, **scope): ...@@ -64,6 +64,7 @@ def createIndex(config, instance, **scope):
"place": {"type": "keyword"}, "place": {"type": "keyword"},
"location": {"type": "keyword"}, "location": {"type": "keyword"},
"tweet": {"type": "text"}, "tweet": {"type": "text"},
"lang": {"type": "keyword"},
"hashtags": {"type": "keyword", "normalizer": "hashtag_normalizer"}, "hashtags": {"type": "keyword", "normalizer": "hashtag_normalizer"},
"cashtags": {"type": "keyword", "normalizer": "hashtag_normalizer"}, "cashtags": {"type": "keyword", "normalizer": "hashtag_normalizer"},
"user_id_str": {"type": "keyword"}, "user_id_str": {"type": "keyword"},
...@@ -223,6 +224,7 @@ def Tweet(Tweet, config): ...@@ -223,6 +224,7 @@ def Tweet(Tweet, config):
"timezone": Tweet.timezone, "timezone": Tweet.timezone,
"place": Tweet.place, "place": Tweet.place,
"tweet": Tweet.tweet, "tweet": Tweet.tweet,
"language": Tweet.lang,
"hashtags": Tweet.hashtags, "hashtags": Tweet.hashtags,
"cashtags": Tweet.cashtags, "cashtags": Tweet.cashtags,
"user_id_str": Tweet.user_id_str, "user_id_str": Tweet.user_id_str,
......
...@@ -76,6 +76,7 @@ def update(object, config): ...@@ -76,6 +76,7 @@ def update(object, config):
"timezone": Tweet.timezone, "timezone": Tweet.timezone,
"place": Tweet.place, "place": Tweet.place,
"tweet": Tweet.tweet, "tweet": Tweet.tweet,
"language": Tweet.lang,
"hashtags": Tweet.hashtags, "hashtags": Tweet.hashtags,
"cashtags": Tweet.cashtags, "cashtags": Tweet.cashtags,
"user_id": Tweet.user_id, "user_id": Tweet.user_id,
......
...@@ -11,6 +11,7 @@ def tweetData(t): ...@@ -11,6 +11,7 @@ def tweetData(t):
"name": t.name, "name": t.name,
"place": t.place, "place": t.place,
"tweet": t.tweet, "tweet": t.tweet,
"language": t.lang,
"mentions": t.mentions, "mentions": t.mentions,
"urls": t.urls, "urls": t.urls,
"photos": t.photos, "photos": t.photos,
...@@ -50,6 +51,7 @@ def tweetFieldnames(): ...@@ -50,6 +51,7 @@ def tweetFieldnames():
"name", "name",
"place", "place",
"tweet", "tweet",
"language",
"mentions", "mentions",
"urls", "urls",
"photos", "photos",
......
...@@ -98,6 +98,7 @@ def Tweet(tw, config): ...@@ -98,6 +98,7 @@ def Tweet(tw, config):
t.photos = [photo_node.attrs['data-image-url'] for photo_node in tw.find_all("div", "AdaptiveMedia-photoContainer")] t.photos = [photo_node.attrs['data-image-url'] for photo_node in tw.find_all("div", "AdaptiveMedia-photoContainer")]
t.video = 1 if tw.find_all("div", "AdaptiveMedia-video") != [] else 0 t.video = 1 if tw.find_all("div", "AdaptiveMedia-video") != [] else 0
t.tweet = getText(tw) t.tweet = getText(tw)
t.lang = tw.find('p', 'tweet-text')['lang']
t.hashtags = [hashtag.text for hashtag in tw.find_all("a","twitter-hashtag")] t.hashtags = [hashtag.text for hashtag in tw.find_all("a","twitter-hashtag")]
t.cashtags = [cashtag.text for cashtag in tw.find_all("a", "twitter-cashtag")] t.cashtags = [cashtag.text for cashtag in tw.find_all("a", "twitter-cashtag")]
t.replies_count = getStat(tw, "reply") t.replies_count = getStat(tw, "reply")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment