Commit b117e5bb authored by Filipp Fediakov's avatar Filipp Fediakov Committed by andytnt

Parse information of existence of video in tweet

parent 5de78856
......@@ -26,6 +26,7 @@ PUT twinttweets
"nreplies": {"type": "integer"},
"nretweets": {"type": "integer"},
"quote_url": {"type": "text"},
"video": {"type": "integer"},
"search": {"type": "text"},
"near": {"type": "text"},
"geo_near": {"type": "geo_point"},
......
......@@ -75,6 +75,7 @@ def init(db):
urls text,
photos text,
quote_url text,
video integer,
time_update integer not null,
PRIMARY KEY (id)
);
......@@ -242,8 +243,9 @@ def tweets(conn, Tweet, config):
",".join(Tweet.urls),
",".join(Tweet.photos),
Tweet.quote_url,
Tweet.video,
time_ms)
cursor.execute('INSERT INTO tweets VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)', entry)
cursor.execute('INSERT INTO tweets VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)', entry)
if config.Favorites:
query = 'INSERT INTO favorites VALUES(?,?)'
......
......@@ -80,6 +80,7 @@ def createIndex(config, instance, **scope):
"nreplies": {"type": "integer"},
"nretweets": {"type": "integer"},
"quote_url": {"type": "text"},
"video": {"type":"integer"},
"search": {"type": "text"},
"near": {"type": "text"},
"geo_near": {"type": "geo_point"},
......@@ -220,6 +221,7 @@ def Tweet(Tweet, config):
"nreplies": int(Tweet.replies_count),
"nretweets": int(Tweet.retweets_count),
"quote_url": Tweet.quote_url,
"video": Tweet.video,
"search": str(config.Search),
"near": config.Near
}
......
......@@ -21,7 +21,8 @@ def tweetData(t):
"hashtags": t.hashtags,
"link": t.link,
"retweet": t.retweet,
"quote_url": t.quote_url
"quote_url": t.quote_url,
"video": t.video
}
return data
......@@ -48,7 +49,8 @@ def tweetFieldnames():
"hashtags",
"link",
"retweet",
"quote_url"
"quote_url",
"video"
]
return fieldnames
......
......@@ -82,6 +82,7 @@ def Tweet(tw, location, config):
t.mentions = getMentions(tw)
t.urls = [link.attrs["data-expanded-url"] for link in tw.find_all('a',{'class':'twitter-timeline-link'}) if link.has_attr("data-expanded-url")]
t.photos = [photo_node.attrs['data-image-url'] for photo_node in tw.find_all("div", "AdaptiveMedia-photoContainer")]
t.video = 1 if tw.find_all("div", "AdaptiveMedia-video") != [] else 0
t.tweet = getText(tw)
t.location = location
t.hashtags = [hashtag.text for hashtag in tw.find_all("a","twitter-hashtag")]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment