Commit 87b5b663 authored by Mustafa Shakir's avatar Mustafa Shakir Committed by GitHub

Add thumbnail attribute to tweet object (#889)

* Add thumbnail attribute to tweet object

Fetch the video thumbnail of a tweet

* Add Thumbnail support for DB, CSV, JSON and Elasticsearch

* Update panda.py

Add missing datafields: video, photos, urls and thumbnail.
parent 446b4097
...@@ -28,6 +28,7 @@ PUT twinttweets ...@@ -28,6 +28,7 @@ PUT twinttweets
"nretweets": {"type": "integer"}, "nretweets": {"type": "integer"},
"quote_url": {"type": "text"}, "quote_url": {"type": "text"},
"video": {"type": "integer"}, "video": {"type": "integer"},
"thumbnail": {"type": "text"},
"search": {"type": "text"}, "search": {"type": "text"},
"near": {"type": "text"}, "near": {"type": "text"},
"geo_near": {"type": "geo_point"}, "geo_near": {"type": "geo_point"},
......
...@@ -15,6 +15,7 @@ def Tweet(config, t): ...@@ -15,6 +15,7 @@ def Tweet(config, t):
output = output.replace("{urls}", ",".join(t.urls)) output = output.replace("{urls}", ",".join(t.urls))
output = output.replace("{photos}", ",".join(t.photos)) output = output.replace("{photos}", ",".join(t.photos))
output = output.replace("{video}", str(t.video)) output = output.replace("{video}", str(t.video))
output = output.replace("{thumbnail}", t.thumbnail)
output = output.replace("{tweet}", t.tweet) output = output.replace("{tweet}", t.tweet)
output = output.replace("{language}", t.lang) output = output.replace("{language}", t.lang)
output = output.replace("{hashtags}", ",".join(t.hashtags)) output = output.replace("{hashtags}", ",".join(t.hashtags))
......
...@@ -76,6 +76,7 @@ def init(db): ...@@ -76,6 +76,7 @@ def init(db):
cashtags text, cashtags text,
urls text, urls text,
photos text, photos text,
thumbnail text,
quote_url text, quote_url text,
video integer, video integer,
geo text, geo text,
...@@ -265,6 +266,7 @@ def tweets(conn, Tweet, config): ...@@ -265,6 +266,7 @@ def tweets(conn, Tweet, config):
",".join(Tweet.cashtags), ",".join(Tweet.cashtags),
",".join(Tweet.urls), ",".join(Tweet.urls),
",".join(Tweet.photos), ",".join(Tweet.photos),
Tweet.thumbnail,
Tweet.quote_url, Tweet.quote_url,
Tweet.video, Tweet.video,
Tweet.geo, Tweet.geo,
...@@ -274,7 +276,7 @@ def tweets(conn, Tweet, config): ...@@ -274,7 +276,7 @@ def tweets(conn, Tweet, config):
Tweet.translate, Tweet.translate,
Tweet.trans_src, Tweet.trans_src,
Tweet.trans_dest) Tweet.trans_dest)
cursor.execute('INSERT INTO tweets VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)', entry) cursor.execute('INSERT INTO tweets VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)', entry)
if config.Favorites: if config.Favorites:
query = 'INSERT INTO favorites VALUES(?,?)' query = 'INSERT INTO favorites VALUES(?,?)'
......
...@@ -81,6 +81,7 @@ def createIndex(config, instance, **scope): ...@@ -81,6 +81,7 @@ def createIndex(config, instance, **scope):
"nretweets": {"type": "integer"}, "nretweets": {"type": "integer"},
"quote_url": {"type": "text"}, "quote_url": {"type": "text"},
"video": {"type":"integer"}, "video": {"type":"integer"},
"thumbnail": {"type":"text"},
"search": {"type": "text"}, "search": {"type": "text"},
"near": {"type": "text"}, "near": {"type": "text"},
"geo_near": {"type": "geo_point"}, "geo_near": {"type": "geo_point"},
...@@ -256,6 +257,8 @@ def Tweet(Tweet, config): ...@@ -256,6 +257,8 @@ def Tweet(Tweet, config):
for photo in Tweet.photos: for photo in Tweet.photos:
_photos.append(photo) _photos.append(photo)
j_data["_source"].update({"photos": _photos}) j_data["_source"].update({"photos": _photos})
if Tweet.thumbnail:
j_data["_source"].update({"thumbnail": Tweet.thumbnail})
if Tweet.mentions: if Tweet.mentions:
_mentions = [] _mentions = []
for mention in Tweet.mentions: for mention in Tweet.mentions:
......
...@@ -86,6 +86,10 @@ def update(object, config): ...@@ -86,6 +86,10 @@ def update(object, config):
"day": day, "day": day,
"hour": hour(Tweet.datetime/1000), "hour": hour(Tweet.datetime/1000),
"link": Tweet.link, "link": Tweet.link,
"urls": Tweet.urls,
"photos": Tweet.photos,
"video": Tweet.video,
"thumbnail": Tweet.thumbnail,
"retweet": Tweet.retweet, "retweet": Tweet.retweet,
"nlikes": int(Tweet.likes_count), "nlikes": int(Tweet.likes_count),
"nreplies": int(Tweet.replies_count), "nreplies": int(Tweet.replies_count),
......
...@@ -24,6 +24,7 @@ def tweetData(t): ...@@ -24,6 +24,7 @@ def tweetData(t):
"retweet": t.retweet, "retweet": t.retweet,
"quote_url": t.quote_url, "quote_url": t.quote_url,
"video": t.video, "video": t.video,
"thumbnail": t.thumbnail,
"near": t.near, "near": t.near,
"geo": t.geo, "geo": t.geo,
"source": t.source, "source": t.source,
...@@ -64,6 +65,7 @@ def tweetFieldnames(): ...@@ -64,6 +65,7 @@ def tweetFieldnames():
"retweet", "retweet",
"quote_url", "quote_url",
"video", "video",
"thumbnail",
"near", "near",
"geo", "geo",
"source", "source",
......
...@@ -74,6 +74,16 @@ def getRetweet(tw, _config): ...@@ -74,6 +74,16 @@ def getRetweet(tw, _config):
return _rt_id, _rt_username return _rt_id, _rt_username
return '', '' return '', ''
def getThumbnail(tw):
"""Get Thumbnail
"""
divs = tw.find_all("div","PlayableMedia-player")
thumb = ""
for div in divs:
thumb = div.attrs["style"].split("url('")[-1]
thumb = thumb.replace("')","")
return thumb
def Tweet(tw, config): def Tweet(tw, config):
"""Create Tweet object """Create Tweet object
""" """
...@@ -97,6 +107,7 @@ def Tweet(tw, config): ...@@ -97,6 +107,7 @@ def Tweet(tw, config):
t.urls = [link.attrs["data-expanded-url"] for link in tw.find_all('a',{'class':'twitter-timeline-link'}) if link.has_attr("data-expanded-url")] t.urls = [link.attrs["data-expanded-url"] for link in tw.find_all('a',{'class':'twitter-timeline-link'}) if link.has_attr("data-expanded-url")]
t.photos = [photo_node.attrs['data-image-url'] for photo_node in tw.find_all("div", "AdaptiveMedia-photoContainer")] t.photos = [photo_node.attrs['data-image-url'] for photo_node in tw.find_all("div", "AdaptiveMedia-photoContainer")]
t.video = 1 if tw.find_all("div", "AdaptiveMedia-video") != [] else 0 t.video = 1 if tw.find_all("div", "AdaptiveMedia-video") != [] else 0
t.thumbnail = getThumbnail(tw)
t.tweet = getText(tw) t.tweet = getText(tw)
t.lang = tw.find('p', 'tweet-text')['lang'] t.lang = tw.find('p', 'tweet-text')['lang']
t.hashtags = [hashtag.text for hashtag in tw.find_all("a","twitter-hashtag")] t.hashtags = [hashtag.text for hashtag in tw.find_all("a","twitter-hashtag")]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment