Commit 41462277 authored by Francesco Poldi's avatar Francesco Poldi

Revert "Merge remote-tracking branch 'origin/master'"

This reverts commit 4c27b2b82cb84e7b8d5fac206b0b8502a5faba4e.
parent 342a6955
...@@ -4,11 +4,8 @@ PUT twinttweets ...@@ -4,11 +4,8 @@ PUT twinttweets
"items": { "items": {
"properties": { "properties": {
"id": {"type": "long"}, "id": {"type": "long"},
"conversation_id": {"type": "text"},
"created_at": {"type":"text"},
"date": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss"}, "date": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss"},
"timezone": {"type": "text"}, "timezone": {"type": "text"},
"place": {"type": "text"},
"location": {"type": "text"}, "location": {"type": "text"},
"hashtags": {"type": "text"}, "hashtags": {"type": "text"},
"tweet": {"type": "text"}, "tweet": {"type": "text"},
...@@ -16,28 +13,16 @@ PUT twinttweets ...@@ -16,28 +13,16 @@ PUT twinttweets
"retweets": {"type": "boolean"}, "retweets": {"type": "boolean"},
"likes": {"type": "boolean"}, "likes": {"type": "boolean"},
"user_id": {"type": "keyword"}, "user_id": {"type": "keyword"},
"user_id_str": {"type":"text"},
"username": {"type": "keyword"}, "username": {"type": "keyword"},
"name": {"type":"text"},
"profile_image_url": {"type":"text"},
"day": {"type": "integer"}, "day": {"type": "integer"},
"hour": {"type": "integer"}, "hour": {"type": "integer"},
"link": {"type": "text"}, "link": {"type": "text"},
"gif_url": {"type": "text"},
"gif_thumb": {"type": "text"},
"video_url": {"type": "text"},
"video_thumb": {"type": "text"},
"is_reply_to": {"type": "integer"},
"has_parent_tweet": {"type": "integer"},
"retweet": {"type": "text"}, "retweet": {"type": "text"},
"user_rt": {"type": "text"},
"essid": {"type": "keyword"}, "essid": {"type": "keyword"},
"nlikes": {"type": "integer"}, "nlikes": {"type": "integer"},
"nreplies": {"type": "integer"}, "nreplies": {"type": "integer"},
"nretweets": {"type": "integer"}, "nretweets": {"type": "integer"},
"is_quote_status": {"type": "integer"},
"quote_id": {"type": "long"},
"quote_id_str": {"type":"text"},
"quote_url": {"type":"text"},
"search": {"type": "text"} "search": {"type": "text"}
} }
} }
......
...@@ -17,10 +17,9 @@ PUT twintuser ...@@ -17,10 +17,9 @@ PUT twintuser
"followers": {"type": "integer"}, "followers": {"type": "integer"},
"likes": {"type": "integer"}, "likes": {"type": "integer"},
"media": {"type": "integer"}, "media": {"type": "integer"},
"private": {"type": "integer"}, "private": {"type": "boolean"},
"verified": {"type": "integer"}, "verified": {"type": "boolean"},
"avatar": {"type": "text"}, "avatar": {"type": "text"},
"background_image": {"type": "text"},
"session": {"type": "keyword"} "session": {"type": "keyword"}
} }
} }
......
...@@ -10,7 +10,6 @@ from aiohttp_socks import SocksConnector, SocksVer ...@@ -10,7 +10,6 @@ from aiohttp_socks import SocksConnector, SocksVer
from . import url from . import url
from .output import Tweets, Users from .output import Tweets, Users
from .user import inf
#import logging #import logging
...@@ -103,14 +102,6 @@ async def Username(_id): ...@@ -103,14 +102,6 @@ async def Username(_id):
return soup.find("a", "fn url alternate-context")["href"].replace("/", "") return soup.find("a", "fn url alternate-context")["href"].replace("/", "")
async def UserId(username):
#loggin.info("[<] " + str(datetime.now()) + ':: get+UserId')
url = f"http://twitter.com/{username}?lang=en"
r = await Request(url)
soup = BeautifulSoup(r, "html.parser")
return int(inf(soup, "id"))
async def Tweet(url, config, conn): async def Tweet(url, config, conn):
#loggin.info("[<] " + str(datetime.now()) + ':: Tweet') #loggin.info("[<] " + str(datetime.now()) + ':: Tweet')
try: try:
......
from . import format, get from . import format
from .tweet import Tweet from .tweet import Tweet
from .user import User from .user import User
from datetime import datetime from datetime import datetime
...@@ -78,31 +78,11 @@ def _output(obj, output, config, **extra): ...@@ -78,31 +78,11 @@ def _output(obj, output, config, **extra):
except UnicodeEncodeError: except UnicodeEncodeError:
print("unicode error [x] output._output") print("unicode error [x] output._output")
async def tweetUserData(tweet,config, conn):
user_ids = set()
usernames = []
for user in tweet.mentions:
if db.get_user_id(conn, user["id"]) == -1 and user["id"] not in user_ids:
user_ids.add(user["id"])
usernames.append(user["screen_name"])
for user in tweet.tags:
if db.get_user_id(conn, user["id"]) == -1 and user["id"] not in user_ids:
user_ids.add(user["id"])
usernames.append(user["screen_name"])
for user in tweet.replies:
if db.get_user_id(conn, user["id"]) == -1 and user["id"] not in user_ids:
user_ids.add(user["id"])
usernames.append(user["screen_name"])
for user in usernames:
url = f"http://twitter.com/{user}?lang=en"
await get.User(url, config, conn)
async def Tweets(tw, location, config, conn): async def Tweets(tw, location, config, conn):
#logging.info("[<] " + str(datetime.now()) + ':: output+Tweets') #logging.info("[<] " + str(datetime.now()) + ':: output+Tweets')
copyright = tw.find("div", "StreamItemContent--withheld") copyright = tw.find("div", "StreamItemContent--withheld")
if copyright is None and is_tweet(tw): if copyright is None and is_tweet(tw):
tweet = Tweet(tw, location, config) tweet = Tweet(tw, location, config)
await tweetUserData(tweet, config, conn)
if datecheck(tweet.datestamp, config): if datecheck(tweet.datestamp, config):
output = format.Tweet(config, tweet) output = format.Tweet(config, tweet)
...@@ -128,8 +108,7 @@ async def Users(u, config, conn): ...@@ -128,8 +108,7 @@ async def Users(u, config, conn):
output = format.User(config.Format, user) output = format.User(config.Format, user)
if config.Database: if config.Database:
#db.user(conn, config.Username, config.Followers, user) db.user(conn, config.Username, config.Followers, user)
db.user(conn, config, user)
if config.Elasticsearch: if config.Elasticsearch:
_save_date = user.join_date _save_date = user.join_date
......
...@@ -94,9 +94,6 @@ class Twint: ...@@ -94,9 +94,6 @@ class Twint:
if self.config.User_id is not None: if self.config.User_id is not None:
self.config.Username = await get.Username(self.config.User_id) self.config.Username = await get.Username(self.config.User_id)
if self.config.Username is not None:
self.config.User_id = await get.UserId(self.config.Username)
if self.config.TwitterSearch and self.config.Since and self.config.Until: if self.config.TwitterSearch and self.config.Since and self.config.Until:
_days = timedelta(days=int(self.config.Timedelta)) _days = timedelta(days=int(self.config.Timedelta))
while self.d._since < self.d._until: while self.d._since < self.d._until:
......
...@@ -18,181 +18,76 @@ def init(db): ...@@ -18,181 +18,76 @@ def init(db):
try: try:
conn = sqlite3.connect(db) conn = sqlite3.connect(db)
cursor = conn.cursor() cursor = conn.cursor()
table_users = """
CREATE TABLE IF NOT EXISTS
users(
id integer not null,
id_str text not null,
name text,
username text not null,
bio text,
location text,
url text,
join_date text not null,
join_time text not null,
tweets integer,
following integer,
followers integer,
likes integer,
media integer,
private integer not null,
verified integer not null,
profile_image_url text not null,
background_image text,
date_update text not null,
CONSTRAINT users_pk PRIMARY KEY (id)
);
"""
cursor.execute(table_users)
table_tweets = """ table_tweets = """
CREATE TABLE IF NOT EXISTS CREATE TABLE IF NOT EXISTS
tweets ( tweets (
id integer not null, id integer not null,
id_str text not null, user_id integer,
tweet text default '',
conversation_id text not null,
created_at integer not null,
date text not null, date text not null,
time text not null, time text not null,
timezone text not null, timezone text not null,
place text default '',
location text not null, location text not null,
replies_count integer, user text not null,
likes_count integer, tweet text not null,
retweets_count integer, replies integer,
user_id integer not null, likes integer,
user_id_str text not null, retweets integer,
screen_name text not null, hashtags text,
name text default '',
profile_image_url text,
link text, link text,
gif_url text, retweet bool,
gif_thumb text, user_rt text,
video_url text, mentions text,
video_thumb text,
is_reply_to integer,
has_parent_tweet integer,
in_reply_to_screen_name text defualt '',
in_reply_to_status_id integer,
in_reply_to_status_id_str text default '',
in_reply_to_user_id integer,
in_reply_to_user_id_str text default '',
is_quote_status integer,
quote_id integer,
quote_id_str text,
quote_url text,
date_update text not null, date_update text not null,
PRIMARY KEY (id) PRIMARY KEY (id)
); );
""" """
cursor.execute(table_tweets) cursor.execute(table_tweets)
table_retweets = """ table_followers_names = """
CREATE TABLE IF NOT EXISTS
retweets(
user_id integer not null,
tweet_id integer not null,
CONSTRAINT retweets_pk PRIMARY KEY(user_id, tweet_id),
CONSTRAINT user_id_fk FOREIGN KEY(user_id) REFERENCES users(id),
CONSTRAINT tweet_id_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id)
);
"""
cursor.execute(table_retweets)
table_mentions = """
CREATE TABLE IF NOT EXISTS
mentions(
tweet_id integer not null,
id integer not null,
id_str text not null,
screen_name text not null,
CONSTRAINT mentions_pk PRIMARY KEY(tweet_id,id),
CONSTRAINT tweet_id_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id)
CONSTRAINT user_id_fk FOREIGN KEY(id) REFERENCES users(id)
);
"""
cursor.execute(table_mentions)
table_replies = """
CREATE TABLE IF NOT EXISTS
replies(
tweet_id integer not null,
id integer not null,
id_str text not null,
screen_name text not null,
CONSTRAINT replies_pk PRIMARY KEY(tweet_id,id),
CONSTRAINT tweet_id_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id)
CONSTRAINT user_id_fk FOREIGN KEY(id) REFERENCES users(id)
);
"""
cursor.execute(table_replies)
table_tags = """
CREATE TABLE IF NOT EXISTS
tags(
tweet_id integer not null,
id integer not null,
id_str text not null,
screen_name text not null,
CONSTRAINT tags_pk PRIMARY KEY(tweet_id, id),
CONSTRAINT tweet_id_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id),
CONSTRAINT user_id_fk FOREIGN KEY(id) REFERENCES users(id)
);
"""
cursor.execute(table_tags)
table_hashtags = """
CREATE TABLE IF NOT EXISTS
hashtags(
tweet_id integer not null,
tag_name text not null,
CONSTRAINT tweet_id_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id)
);
"""
cursor.execute(table_hashtags)
table_urls = """
CREATE TABLE IF NOT EXISTS CREATE TABLE IF NOT EXISTS
urls( followers_names (
tweet_id integer not null, user text not null,
url text not null, date_update text not null,
CONSTRAINT urls_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id) follower text not null,
PRIMARY KEY (user, follower)
); );
"""
cursor.execute(table_urls)
table_photos = """
CREATE TABLE IF NOT EXISTS
photos(
tweet_id integer not null,
url text not null,
CONSTRAINT photos_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id)
);
""" """
cursor.execute(table_photos) cursor.execute(table_followers_names)
table_favorites = """ table_following_names = """
CREATE TABLE IF NOT EXISTS CREATE TABLE IF NOT EXISTS
favorites( following_names (
user_id integer not null, user text not null,
tweet_id integer not null, date_update text not null,
CONSTRAINT favorites_pk PRIMARY KEY (user_id, tweet_id), follows text not null,
CONSTRAINT user_id_fk FOREIGN KEY (user_id) REFERENCES users(id), PRIMARY KEY (user, follows)
CONSTRAINT tweet_id_fk FOREIGN KEY (tweet_id) REFERENCES tweets(id)
); );
""" """
cursor.execute(table_favorites) cursor.execute(table_following_names)
table_followers = """ table_followers = """
CREATE TABLE IF NOT EXISTS CREATE TABLE IF NOT EXISTS
followers ( followers (
id integer not null, id integer not null,
follower_id integer not null, name text,
CONSTRAINT followers_pk PRIMARY KEY (id, follower_id), username text not null,
CONSTRAINT id_fk FOREIGN KEY(id) REFERENCES users(id), bio text,
CONSTRAINT follower_id_fk FOREIGN KEY(follower_id) REFERENCES users(id) location,
url text,
join_date text not null,
join_time text not null,
tweets integer,
following integer,
followers integer,
likes integer,
media integer,
private text not null,
verified text not null,
avatar text not null,
date_update text not null,
follower text not null,
PRIMARY KEY (id, username, follower)
); );
""" """
cursor.execute(table_followers) cursor.execute(table_followers)
...@@ -201,35 +96,27 @@ def init(db): ...@@ -201,35 +96,27 @@ def init(db):
CREATE TABLE IF NOT EXISTS CREATE TABLE IF NOT EXISTS
following ( following (
id integer not null, id integer not null,
following_id integer not null, name text,
CONSTRAINT following_pk PRIMARY KEY (id, following_id), username text not null,
CONSTRAINT id_fk FOREIGN KEY(id) REFERENCES users(id), bio text,
CONSTRAINT following_id_fk FOREIGN KEY(following_id) REFERENCES users(id) location text,
); url text,
""" join_date text not null,
cursor.execute(table_following) join_time text not null,
tweets integer,
table_followers_names = """ following integer,
CREATE TABLE IF NOT EXISTS followers integer,
followers_names ( likes integer,
user text not null, media integer,
date_update text not null, private text not null,
follower text not null, verified text not null,
PRIMARY KEY (user, follower) avatar text not null,
);
"""
cursor.execute(table_followers_names)
table_following_names = """
CREATE TABLE IF NOT EXISTS
following_names (
user text not null,
date_update text not null, date_update text not null,
follows text not null, follows text not null,
PRIMARY KEY (user, follows) PRIMARY KEY (id, username, follows)
); );
""" """
cursor.execute(table_following_names) cursor.execute(table_following)
return conn return conn
except Exception as e: except Exception as e:
...@@ -263,12 +150,11 @@ def follow(conn, Username, Followers, User): ...@@ -263,12 +150,11 @@ def follow(conn, Username, Followers, User):
except sqlite3.IntegrityError: except sqlite3.IntegrityError:
pass pass
def user(conn, config, User): def user(conn, Username, Followers, User):
try: try:
date_time = str(datetime.now()) date_time = str(datetime.now())
cursor = conn.cursor() cursor = conn.cursor()
entry = (int(User.id), entry = (User.id,
User.id,
User.name, User.name,
User.username, User.username,
User.bio, User.bio,
...@@ -284,105 +170,37 @@ def user(conn, config, User): ...@@ -284,105 +170,37 @@ def user(conn, config, User):
User.is_private, User.is_private,
User.is_verified, User.is_verified,
User.avatar, User.avatar,
User.background_image, date_time,
date_time) Username,)
query = f"INSERT INTO users VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)" table = uTable(Followers)
query = f"INSERT INTO {table} VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"
cursor.execute(query, entry) cursor.execute(query, entry)
if config.Followers or config.Following:
table = uTable(config.Followers)
query = f"INSERT INTO {table} VALUES(?,?)"
cursor.execute(query, (config.User_id, int(User.id)))
conn.commit() conn.commit()
except sqlite3.IntegrityError: except sqlite3.IntegrityError:
pass pass
def get_user_id(conn, id):
cursor = conn.cursor()
cursor.execute('SELECT id FROM users WHERE id = ? LIMIT 1', (id,))
resultset = cursor.fetchall()
return resultset[0][0] if resultset else -1
def tweets(conn, Tweet, config): def tweets(conn, Tweet, config):
try: try:
date_time = str(datetime.now()) date_time = str(datetime.now())
cursor = conn.cursor() cursor = conn.cursor()
entry = (Tweet.id, entry = (Tweet.id,
Tweet.id_str, Tweet.user_id,
Tweet.tweet,
Tweet.conversation_id,
Tweet.datetime,
Tweet.datestamp, Tweet.datestamp,
Tweet.timestamp, Tweet.timestamp,
Tweet.timezone, Tweet.timezone,
Tweet.place,
Tweet.location, Tweet.location,
Tweet.replies_count,
Tweet.likes_count,
Tweet.retweets_count,
Tweet.user_id,
Tweet.user_id_str,
Tweet.username, Tweet.username,
Tweet.name, Tweet.tweet,
Tweet.profile_image_url, Tweet.replies,
Tweet.likes,
Tweet.retweets,
",".join(Tweet.hashtags),
Tweet.link, Tweet.link,
Tweet.gif_url, Tweet.retweet,
Tweet.gif_thumb, Tweet.user_rt,
Tweet.video_url, ",".join(Tweet.mentions),
Tweet.video_thumb,
Tweet.is_reply_to,
Tweet.has_parent_tweet,
Tweet.in_reply_to_screen_name,
Tweet.in_reply_to_status_id,
Tweet.in_reply_to_status_id_str,
Tweet.in_reply_to_user_id,
Tweet.in_reply_to_user_id_str,
Tweet.is_quote_status,
Tweet.quote_id,
Tweet.quote_id_str,
Tweet.quote_url,
date_time) date_time)
cursor.execute('INSERT INTO tweets VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)', entry) cursor.execute('INSERT INTO tweets VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)', entry)
if len(Tweet.mentions) > 0:
query = 'INSERT INTO mentions VALUES(?, ?, ?, ?)'
for mention in Tweet.mentions:
cursor.execute(query, (Tweet.id, mention["id"], mention["id_str"], mention["screen_name"]))
if len(Tweet.replies) > 0:
query = 'INSERT INTO replies VALUES(?, ?, ?, ?)'
for reply in Tweet.replies:
cursor.execute(query, (Tweet.id, reply["id"], reply["id_str"], reply["screen_name"]))
if len(Tweet.tags) > 0:
query = 'INSERT INTO tags VALUES(?, ?, ?, ?)'
for tag in Tweet.tags:
cursor.execute(query, (Tweet.id, tag["id"], tag["id_str"], tag["screen_name"]))
if len(Tweet.hashtags) > 0:
query = 'INSERT OR IGNORE INTO hashtags (tweet_id, tag_name) VALUES(?,?)'
for tag in Tweet.hashtags:
cursor.execute(query, (Tweet.id, tag))
if len(Tweet.urls) > 0:
query = 'INSERT INTO urls VALUES(?, ?)'
for url in Tweet.urls:
cursor.execute(query, (Tweet.id, url))
if len(Tweet.photos) > 0:
query = 'INSERT INTO photos VALUES(?, ?)'
for photo in Tweet.photos:
cursor.execute(query, (Tweet.id, photo))
if config.Favorites:
query = 'INSERT INTO favorites VALUES(?,?)'
cursor.execute(query, (config.User_id, Tweet.id))
if Tweet.retweet == 1:
query = 'INSERT INTO retweets VALUES(?,?)'
cursor.execute(query, (config.User_id, Tweet.id))
conn.commit() conn.commit()
except sqlite3.IntegrityError: except sqlite3.IntegrityError:
pass pass
...@@ -171,37 +171,22 @@ def Tweet(Tweet, config): ...@@ -171,37 +171,22 @@ def Tweet(Tweet, config):
"_id": Tweet.id + "_raw_" + config.Essid, "_id": Tweet.id + "_raw_" + config.Essid,
"_source": { "_source": {
"id": Tweet.id, "id": Tweet.id,
"conversation_id": Tweet.conversation_id,
"created_at": Tweet.created_at,
"date": dt, "date": dt,
"timezone": Tweet.timezone, "timezone": Tweet.timezone,
"place": Tweet.place,
"location": Tweet.location, "location": Tweet.location,
"tweet": Tweet.tweet, "tweet": Tweet.tweet,
"hashtags": Tweet.hashtags, "hashtags": Tweet.hashtags,
"user_id": Tweet.user_id, "user_id": Tweet.user_id,
"user_id_str": Tweet.user_id_str,
"username": Tweet.username, "username": Tweet.username,
"name": Tweet.name,
"profile_image_url": Tweet.profile_image_url,
"day": day, "day": day,
"hour": hour(Tweet.datetime), "hour": hour(Tweet.datetime),
"link": Tweet.link, "link": Tweet.link,
"gif_url": Tweet.gif_url,
"gif_thumb": Tweet.gif_thumb,
"video_url": Tweet.video_url,
"video_thumb": Tweet.video_thumb,
"is_reply_to": Tweet.is_reply_to,
"has_parent_tweet": Tweet.has_parent_tweet,
"retweet": Tweet.retweet, "retweet": Tweet.retweet,
"user_rt": Tweet.user_rt,
"essid": config.Essid, "essid": config.Essid,
"nlikes": int(Tweet.likes_count), "nlikes": int(Tweet.likes),
"nreplies": int(Tweet.replies_count), "nreplies": int(Tweet.replies),
"nretweets": int(Tweet.retweets_count), "nretweets": int(Tweet.retweets),
"is_quote_status": Tweet.is_quote_status,
"quote_id": Tweet.quote_id,
"quote_id_str": Tweet.quote_id_str,
"quote_url": Tweet.quote_url,
"search": str(config.Search) "search": str(config.Search)
} }
} }
...@@ -345,7 +330,6 @@ def UserProfile(user, config): ...@@ -345,7 +330,6 @@ def UserProfile(user, config):
"private": user.is_private, "private": user.is_private,
"verified": user.is_verified, "verified": user.is_verified,
"avatar": user.avatar, "avatar": user.avatar,
"background_image": user.background_image,
"session": config.Essid "session": config.Essid
} }
} }
......
from time import strftime, localtime from time import strftime, localtime
import json import re
#from datetime import datetime #from datetime import datetime
#import logging #import logging
...@@ -11,84 +11,16 @@ class tweet: ...@@ -11,84 +11,16 @@ class tweet:
def __init__(self): def __init__(self):
pass pass
def getRawURLS(tw, link, config):
player = tw.find_all("div","PlayableMedia-player")
gif_url, gif_thumb, video_url, video_thumb = "", "", "", ""
for node in player:
styles = node.attrs['style'].split()
for style in styles:
if style.startswith('background'):
tmp = "background-image:url('"
style = style.replace(tmp, "")
if "tweet_video_thumb" in style:
gif_url = style.replace("')",'')
gif_url = gif_url.replace('.jpg','.mp4')
gif_url = gif_url.replace('https://pbs','https://video')
gif_url = gif_url.replace("_thumb", "")
gif_thumb = style.replace("')", "")
else:
video_url, video_thumb = "video","video_thumb"
return gif_url, gif_thumb, video_url, video_thumb
def getMentions(tw): def getMentions(tw):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getMentions') #logging.info("[<] " + str(datetime.now()) + ':: tweet+getMentions')
"""Extract ment from tweet """Extract ment from tweet
""" """
mentions = [{"id":int(mention["data-mentioned-user-id"]),"id_str": mention["data-mentioned-user-id"],"screen_name":mention.get('href').split("/")[-1]} for mention in tw.find_all('a',{'class':'twitter-atreply'})]
return mentions
def getReplies(tw):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getReplies')
"""Extract replies from tweet
"""
replyToUsersJSON = json.loads(tw.find("div")["data-reply-to-users-json"])
replies = [{"id":int(reply["id_str"]),"id_str": reply["id_str"],"screen_name":reply["screen_name"]} for reply in replyToUsersJSON]
return replies
def getTags(tw):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getTags')
"""Extract tags from tweet
"""
tags = []
try:
tag_links = tw.find("div","media-tagging-block").find_all("a","js-user-profile-link")
for tag in tag_links:
if tag.has_attr("data-user-id"):
tmpData = {
"id":int(tag["data-user-id"]),
"id_str": tag["data-user-id"],
"screen_name":tag.get('href').split("/")[-1]
}
tags.append(tmpData)
except:
tags = []
return tags
def getQuoteInfo(tw):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getQuoteInfo')
"""Extract quote from tweet
"""
base_twitter = "https://twitter.com"
quote_status = 0
quote_id = 0
quote_id_str = ""
quote_url = ""
try: try:
quote = tw.find("div","QuoteTweet-innerContainer") mentions = tw.find("div", "js-original-tweet")["data-mentions"].split(" ")
quote_status = 1
quote_id = int(quote["data-item-id"])
quote_id_str = quote["data-item-id"]
quote_url = base_twitter + quote.get("href")
except: except:
quote_status = 0 mentions = ""
return quote_status, quote_id, quote_id_str, quote_url return mentions
def getText(tw): def getText(tw):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getText') #logging.info("[<] " + str(datetime.now()) + ':: tweet+getText')
...@@ -101,6 +33,25 @@ def getText(tw): ...@@ -101,6 +33,25 @@ def getText(tw):
return text return text
def getTweet(tw, mentions):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getTweet')
try:
text = getText(tw)
for i in range(len(mentions)):
mention = f"@{mentions[i]}"
if mention not in text:
text = f"{mention} {text}"
except:
text = getText(tw)
return text
def getHashtags(text):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getHashtags')
"""Get hashtags of tweet
"""
return re.findall(r'(?i)\#\w+', text, flags=re.UNICODE)
def getStat(tw, _type): def getStat(tw, _type):
"""Get stats about Tweet """Get stats about Tweet
""" """
...@@ -110,50 +61,42 @@ def getStat(tw, _type): ...@@ -110,50 +61,42 @@ def getStat(tw, _type):
def getRetweet(profile, username, user): def getRetweet(profile, username, user):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getRetweet') #logging.info("[<] " + str(datetime.now()) + ':: tweet+getRetweet')
if profile and username.lower() != user.lower(): if profile and username.lower() != user:
return 1 return True
def getUser_rt(profile, username, user):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getUser_rt')
"""Get username that retweeted
"""
if getRetweet(profile, username, user):
user_rt = user
else:
user_rt = "None"
return user_rt
def Tweet(tw, location, config): def Tweet(tw, location, config):
"""Create Tweet object """Create Tweet object
""" """
##logging.info("[<] " + str(datetime.now()) + ':: tweet+Tweet') ##logging.info("[<] " + str(datetime.now()) + ':: tweet+Tweet')
t = tweet() t = tweet()
t.id = int(tw.find("div")["data-item-id"]) t.id = tw.find("div")["data-item-id"]
t.id_str = tw.find("div")["data-item-id"]
t.conversation_id = tw.find("div")["data-conversation-id"]
t.datetime = int(tw.find("span", "_timestamp")["data-time"]) t.datetime = int(tw.find("span", "_timestamp")["data-time"])
t.datestamp = strftime("%Y-%m-%d", localtime(t.datetime)) t.datestamp = strftime("%Y-%m-%d", localtime(t.datetime))
t.timestamp = strftime("%H:%M:%S", localtime(t.datetime)) t.timestamp = strftime("%H:%M:%S", localtime(t.datetime))
t.user_id = int(tw.find("div")["data-user-id"]) t.user_id = tw.find("a", "account-group js-account-group js-action-profile js-user-profile-link js-nav")["data-user-id"]
t.user_id_str = tw.find("div")["data-user-id"] t.username = tw.find("span", "username").text.replace("@", "")
t.username = tw.find("div")["data-screen-name"]
t.name = tw.find("div")["data-name"]
t.profile_image_url = tw.find("img", "js-action-profile-avatar").get('src').replace("_bigger","")
t.place = tw.find("a","js-geo-pivot-link").text.strip() if tw.find("a","js-geo-pivot-link") else None
t.timezone = strftime("%Z", localtime()) t.timezone = strftime("%Z", localtime())
for img in tw.findAll("img", "Emoji Emoji--forText"): for img in tw.findAll("img", "Emoji Emoji--forText"):
img.replaceWith(img["alt"]) img.replaceWith(img["alt"])
t.mentions = getMentions(tw) t.mentions = getMentions(tw)
t.tags = getTags(tw) t.tweet = getTweet(tw, t.mentions)
t.replies = getReplies(tw)
t.urls = [link.attrs["data-expanded-url"] for link in tw.find_all('a',{'class':'twitter-timeline-link'}) if link.has_attr("data-expanded-url")]
t.photos = [photo_node.attrs['data-image-url'] for photo_node in tw.find_all("div", "AdaptiveMedia-photoContainer")]
t.tweet = getText(tw)
t.location = location t.location = location
t.hashtags = [hashtag.text for hashtag in tw.find_all("a","twitter-hashtag")] t.hashtags = getHashtags(t.tweet)
t.replies_count = getStat(tw, "reply") t.replies = getStat(tw, "reply")
t.retweets_count = getStat(tw, "retweet") t.retweets = getStat(tw, "retweet")
t.likes_count = getStat(tw, "favorite") t.likes = getStat(tw, "favorite")
t.link = f"https://twitter.com/{t.username}/status/{t.id}" t.link = f"https://twitter.com/{t.username}/status/{t.id}"
t.retweet = getRetweet(config.Profile, t.username, config.Username) t.retweet = getRetweet(config.Profile, t.username, config.Username)
t.gif_url, t.gif_thumb, t.video_url, t.video_thumb = getRawURLS(tw, t.link, config) t.user_rt = getUser_rt(config.Profile, t.username, config.Username)
t.is_quote_status, t.quote_id, t.quote_id_str, t.quote_url = getQuoteInfo(tw)
t.is_reply_to = int(bool(tw.find("div")["data-is-reply-to"])) if tw.find("div").has_attr("data-is-reply-to") else 0
t.has_parent_tweet = int(bool(tw.find("div")["data-has-parent-tweet"])) if tw.find("div").has_attr("data-has-parent-tweet") else 0
t.in_reply_to_screen_name = ""
t.in_reply_to_status_id = 0
t.in_reply_to_status_id_str = ""
t.in_reply_to_user_id = 0
t.in_reply_to_user_id_str = ""
return t return t
...@@ -20,10 +20,6 @@ def inf(ur, _type): ...@@ -20,10 +20,6 @@ def inf(ur, _type):
ret = group["data-screen-name"] ret = group["data-screen-name"]
elif _type == "private": elif _type == "private":
ret = group["data-protected"] ret = group["data-protected"]
if ret == 'true':
ret = 1
else:
ret = 0
return ret return ret
...@@ -32,18 +28,18 @@ def card(ur, _type): ...@@ -32,18 +28,18 @@ def card(ur, _type):
try: try:
ret = ur.find("p", "ProfileHeaderCard-bio u-dir").text.replace("\n", " ") ret = ur.find("p", "ProfileHeaderCard-bio u-dir").text.replace("\n", " ")
except: except:
ret = None ret = "None"
elif _type == "location": elif _type == "location":
try: try:
ret = ur.find("span", "ProfileHeaderCard-locationText u-dir").text ret = ur.find("span", "ProfileHeaderCard-locationText u-dir").text
ret = ret[15:].replace("\n", " ")[:-10] ret = ret[15:].replace("\n", " ")[:-10]
except: except:
ret = None ret = "None"
elif _type == "url": elif _type == "url":
try: try:
ret = ur.find("span", "ProfileHeaderCard-urlText u-dir").find("a")["title"] ret = ur.find("span", "ProfileHeaderCard-urlText u-dir").find("a")["title"]
except: except:
ret = None ret = "None"
return ret return ret
...@@ -58,13 +54,11 @@ def convertToInt(x): ...@@ -58,13 +54,11 @@ def convertToInt(x):
"b" : 1000000000, "b" : 1000000000,
} }
try : try :
if ',' in x:
x = x.replace(',', '')
y = int(x) y = int(x)
return y return y
except : except :
pass pass
try : try :
y = float(str(x)[:-1]) y = float(str(x)[:-1])
y = y * multDict[str(x)[-1:].lower()] y = y * multDict[str(x)[-1:].lower()]
...@@ -85,10 +79,11 @@ def stat(ur, _type): ...@@ -85,10 +79,11 @@ def stat(ur, _type):
def media(ur): def media(ur):
try: try:
media_count = ur.find("a", "PhotoRail-headingWithCount js-nav").text.strip().split(" ")[0] media_count = ur.find("a", "PhotoRail-headingWithCount js-nav").text
media_count = convertToInt(media_count) media_count = media_count.replace("\n", "")[32:].split(" ")[0]
media_count = convertToInt(media_count)
except: except:
media_count = 0 media_count = "0"
return media_count return media_count
...@@ -96,11 +91,11 @@ def verified(ur): ...@@ -96,11 +91,11 @@ def verified(ur):
try: try:
is_verified = ur.find("span", "ProfileHeaderCard-badges").text is_verified = ur.find("span", "ProfileHeaderCard-badges").text
if "Verified account" in is_verified: if "Verified account" in is_verified:
is_verified = 1 is_verified = "true"
else: else:
is_verified = 0 is_verified = "false"
except: except:
is_verified = 0 is_verified = "false"
return is_verified return is_verified
...@@ -124,5 +119,4 @@ def User(ur): ...@@ -124,5 +119,4 @@ def User(ur):
u.is_private = inf(ur, "private") u.is_private = inf(ur, "private")
u.is_verified = verified(ur) u.is_verified = verified(ur)
u.avatar = ur.find("img", "ProfileAvatar-image")["src"] u.avatar = ur.find("img", "ProfileAvatar-image")["src"]
u.background_image = ur.find('div',{'class':'ProfileCanopy-headerBg'}).find('img').get('src')
return u return u
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment