Commit 499f070f authored by andytnt's avatar andytnt

cleanup

parent a2ad7e62
...@@ -25,29 +25,29 @@ Some of the benefits of using Twint vs Twitter API: ...@@ -25,29 +25,29 @@ Some of the benefits of using Twint vs Twitter API:
## Installing ## Installing
### Stable version ### Stable version
**Pip:** **Pip:**
```bash ```bash
pip3 install twint pip3 install twint
``` ```
**Pipenv**: **Pipenv**:
```bash ```bash
pipenv install twint pipenv install twint
``` ```
### Development version ### Development version
**Git:** **Git:**
```bash ```bash
git clone https://github.com/twintproject/twint.git git clone https://github.com/twintproject/twint.git
pip3 install -r requirements.txt pip3 install -r requirements.txt
``` ```
**Pip:** **Pip:**
```bash ```bash
pip3 install --upgrade -e git+https://github.com/twintproject/twint.git@origin/master#egg=twint pip3 install --upgrade -e git+https://github.com/twintproject/twint.git@origin/master#egg=twint
``` ```
**Pipenv**: **Pipenv**:
```bash ```bash
pipenv install -e git+https://github.com/twintproject/twint.git#egg=twint pipenv install -e git+https://github.com/twintproject/twint.git#egg=twint
``` ```
...@@ -167,22 +167,6 @@ Twitter can shadow-ban accounts, which means that their tweets will not be avail ...@@ -167,22 +167,6 @@ Twitter can shadow-ban accounts, which means that their tweets will not be avail
`python Twint.py --userlist inputlist --user-full` `python Twint.py --userlist inputlist --user-full`
#### Only tweets without user info
> To get only tweets without user info
`python Twint.py -u username --profile-full` or `set c.User_info = False`
`python Twint.py -u username` or `set c.User_info = False`
#### Tweets with user info works ONLY with a Database (currently)
> To get tweets along with user info of users mentioned in tweet/replied to
`python Twint.py -u username --user-info -db database.db`
`python Twint.py -u username --profile-full --user-info -db database.db`
## Contact ## Contact
If you have any questions, want to join in on discussions, or need extra help, you are welcome to join our OSINT focused [Slack server](https://join.slack.com/t/os-int/shared_invite/enQtNDI1MDA2OTg4MDg0LWUxYWNmMjI2MGFlMTZjZjhmOWY1ZTVhNmFiMDU2NzY1MzhiMDI2ZTZmYmEwY2MxY2YzMGFkZTY2MTcxZWI2ODM). If you have any questions, want to join in on discussions, or need extra help, you are welcome to join our OSINT focused [Slack server](https://join.slack.com/t/os-int/shared_invite/enQtNDI1MDA2OTg4MDg0LWUxYWNmMjI2MGFlMTZjZjhmOWY1ZTVhNmFiMDU2NzY1MzhiMDI2ZTZmYmEwY2MxY2YzMGFkZTY2MTcxZWI2ODM).
...@@ -83,7 +83,6 @@ def initialize(args): ...@@ -83,7 +83,6 @@ def initialize(args):
c.Essid = args.essid c.Essid = args.essid
c.Format = args.format c.Format = args.format
c.User_full = args.user_full c.User_full = args.user_full
c.User_info = args.user_info
c.Profile_full = args.profile_full c.Profile_full = args.profile_full
c.Store_pandas = args.store_pandas c.Store_pandas = args.store_pandas
c.Pandas_type = args.pandas_type c.Pandas_type = args.pandas_type
...@@ -155,7 +154,6 @@ def options(): ...@@ -155,7 +154,6 @@ def options():
ap.add_argument("--user-full", ap.add_argument("--user-full",
help="Collect all user information (Use with followers or following only).", help="Collect all user information (Use with followers or following only).",
action="store_true") action="store_true")
ap.add_argument("--user-info", help="Scrape user's info in tweet", action="store_true")
ap.add_argument("--profile-full", ap.add_argument("--profile-full",
help="Slow, but effective method of collecting a user's Tweets and RT.", help="Slow, but effective method of collecting a user's Tweets and RT.",
action="store_true") action="store_true")
......
...@@ -20,20 +20,11 @@ PUT twinttweets ...@@ -20,20 +20,11 @@ PUT twinttweets
"day": {"type": "integer"}, "day": {"type": "integer"},
"hour": {"type": "integer"}, "hour": {"type": "integer"},
"link": {"type": "text"}, "link": {"type": "text"},
"gif_url": {"type": "text"},
"gif_thumb": {"type": "text"},
"video_url": {"type": "text"},
"video_thumb": {"type": "text"},
"is_reply_to": {"type": "long"},
"has_parent_tweet": {"type": "long"},
"retweet": {"type": "text"}, "retweet": {"type": "text"},
"essid": {"type": "keyword"}, "essid": {"type": "keyword"},
"nlikes": {"type": "integer"}, "nlikes": {"type": "integer"},
"nreplies": {"type": "integer"}, "nreplies": {"type": "integer"},
"nretweets": {"type": "integer"}, "nretweets": {"type": "integer"},
"is_quote_status": {"type": "long"},
"quote_id": {"type": "long"},
"quote_id_str": {"type": "text"},
"quote_url": {"type": "text"}, "quote_url": {"type": "text"},
"search": {"type": "text"}, "search": {"type": "text"},
"near": {"type": "text"}, "near": {"type": "text"},
......
...@@ -33,7 +33,6 @@ class Config: ...@@ -33,7 +33,6 @@ class Config:
Favorites = False Favorites = False
TwitterSearch = False TwitterSearch = False
User_full = False User_full = False
User_info = False
Profile_full = False Profile_full = False
Store_object = False Store_object = False
Store_pandas = False Store_pandas = False
......
...@@ -46,7 +46,7 @@ def _output(obj, output, config, **extra): ...@@ -46,7 +46,7 @@ def _output(obj, output, config, **extra):
else: else:
obj.username = obj.username.lower() obj.username = obj.username.lower()
for i in range(len(obj.mentions)): for i in range(len(obj.mentions)):
obj.mentions[i] = obj.mentions[i]["screen_name"].lower() obj.mentions[i] = obj.mentions[i].lower()
for i in range(len(obj.hashtags)): for i in range(len(obj.hashtags)):
obj.hashtags[i] = obj.hashtags[i].lower() obj.hashtags[i] = obj.hashtags[i].lower()
if config.Output != None: if config.Output != None:
...@@ -87,51 +87,6 @@ async def checkData(tweet, location, config, conn): ...@@ -87,51 +87,6 @@ async def checkData(tweet, location, config, conn):
if copyright is None and is_tweet(tweet): if copyright is None and is_tweet(tweet):
tweet = Tweet(tweet, location, config) tweet = Tweet(tweet, location, config)
if config.Database is not None and config.User_info:
for user in tweet.mentions:
if db.get_user_id(conn, user["id"]) == -1 and user["id"] not in user_ids:
user_ids.add(user["id"])
usernames.append(user["screen_name"])
for user in tweet.tags:
if db.get_user_id(conn, user["id"]) == -1 and user["id"] not in user_ids:
user_ids.add(user["id"])
usernames.append(user["screen_name"])
for user in tweet.replies:
if db.get_user_id(conn, user["id"]) == -1 and user["id"] not in user_ids:
user_ids.add(user["id"])
usernames.append(user["screen_name"])
if config.Database is not None and config.User_info:
for user in usernames:
url = f"http://twitter.com/{user}?lang=en"
await get.User(url, config, conn)
if config.User_info:
for user in tweet.mentions:
try:
_duplicate_dict[user["screen_name"]]
except KeyError:
_duplicate_dict[user["screen_name"]] = True
_user = user["screen_name"]
url = f"http://twitter.com/{_user}?lang=en"
await get.User(url, config, conn)
for user in tweet.tags:
try:
_duplicate_dict[user["screen_name"]]
except KeyError:
_duplicate_dict[user["screen_name"]] = True
_user = user["screen_name"]
url = f"http://twitter.com/{_user}?lang=en"
await get.User(url, config, conn)
for user in tweet.replies:
try:
_duplicate_dict[user["screen_name"]]
except KeyError:
_duplicate_dict[user["screen_name"]] = True
_user = user["screen_name"]
url = f"http://twitter.com/{_user}?lang=en"
await get.User(url, config, conn)
if datecheck(tweet.datestamp, config): if datecheck(tweet.datestamp, config):
output = format.Tweet(config, tweet) output = format.Tweet(config, tweet)
......
from datetime import datetime
import sqlite3 import sqlite3
import sys import sys
import time
def Conn(database): def Conn(database):
if database: if database:
...@@ -40,7 +40,7 @@ def init(db): ...@@ -40,7 +40,7 @@ def init(db):
verified integer not null, verified integer not null,
profile_image_url text not null, profile_image_url text not null,
background_image text, background_image text,
date_update text not null, time_update integer not null,
CONSTRAINT users_pk PRIMARY KEY (id) CONSTRAINT users_pk PRIMARY KEY (id)
); );
""" """
...@@ -68,22 +68,12 @@ def init(db): ...@@ -68,22 +68,12 @@ def init(db):
name text default '', name text default '',
profile_image_url text, profile_image_url text,
link text, link text,
gif_url text, mentions text,
gif_thumb text, hashtags text,
video_url text, urls text,
video_thumb text, photos text,
is_reply_to integer,
has_parent_tweet integer,
in_reply_to_screen_name text defualt '',
in_reply_to_status_id integer,
in_reply_to_status_id_str text default '',
in_reply_to_user_id integer,
in_reply_to_user_id_str text default '',
is_quote_status integer,
quote_id integer,
quote_id_str text,
quote_url text, quote_url text,
date_update text not null, time_update integer not null,
PRIMARY KEY (id) PRIMARY KEY (id)
); );
""" """
...@@ -101,78 +91,6 @@ def init(db): ...@@ -101,78 +91,6 @@ def init(db):
""" """
cursor.execute(table_retweets) cursor.execute(table_retweets)
table_mentions = """
CREATE TABLE IF NOT EXISTS
mentions(
tweet_id integer not null,
id integer not null,
id_str text not null,
screen_name text not null,
CONSTRAINT mentions_pk PRIMARY KEY(tweet_id,id),
CONSTRAINT tweet_id_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id)
CONSTRAINT user_id_fk FOREIGN KEY(id) REFERENCES users(id)
);
"""
cursor.execute(table_mentions)
table_replies = """
CREATE TABLE IF NOT EXISTS
replies(
tweet_id integer not null,
id integer not null,
id_str text not null,
screen_name text not null,
CONSTRAINT replies_pk PRIMARY KEY(tweet_id,id),
CONSTRAINT tweet_id_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id)
CONSTRAINT user_id_fk FOREIGN KEY(id) REFERENCES users(id)
);
"""
cursor.execute(table_replies)
table_tags = """
CREATE TABLE IF NOT EXISTS
tags(
tweet_id integer not null,
id integer not null,
id_str text not null,
screen_name text not null,
CONSTRAINT tags_pk PRIMARY KEY(tweet_id, id),
CONSTRAINT tweet_id_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id),
CONSTRAINT user_id_fk FOREIGN KEY(id) REFERENCES users(id)
);
"""
cursor.execute(table_tags)
table_hashtags = """
CREATE TABLE IF NOT EXISTS
hashtags(
tweet_id integer not null,
tag_name text not null,
CONSTRAINT tweet_id_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id)
);
"""
cursor.execute(table_hashtags)
table_urls = """
CREATE TABLE IF NOT EXISTS
urls(
tweet_id integer not null,
url text not null,
CONSTRAINT urls_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id)
);
"""
cursor.execute(table_urls)
table_photos = """
CREATE TABLE IF NOT EXISTS
photos(
tweet_id integer not null,
url text not null,
CONSTRAINT photos_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id)
);
"""
cursor.execute(table_photos)
table_favorites = """ table_favorites = """
CREATE TABLE IF NOT EXISTS CREATE TABLE IF NOT EXISTS
favorites( favorites(
...@@ -213,7 +131,7 @@ def init(db): ...@@ -213,7 +131,7 @@ def init(db):
CREATE TABLE IF NOT EXISTS CREATE TABLE IF NOT EXISTS
followers_names ( followers_names (
user text not null, user text not null,
date_update text not null, time_update integer not null,
follower text not null, follower text not null,
PRIMARY KEY (user, follower) PRIMARY KEY (user, follower)
); );
...@@ -224,7 +142,7 @@ def init(db): ...@@ -224,7 +142,7 @@ def init(db):
CREATE TABLE IF NOT EXISTS CREATE TABLE IF NOT EXISTS
following_names ( following_names (
user text not null, user text not null,
date_update text not null, time_update integer not null,
follows text not null, follows text not null,
PRIMARY KEY (user, follows) PRIMARY KEY (user, follows)
); );
...@@ -253,9 +171,9 @@ def uTable(Followers): ...@@ -253,9 +171,9 @@ def uTable(Followers):
def follow(conn, Username, Followers, User): def follow(conn, Username, Followers, User):
try: try:
date_time = str(datetime.now()) time_ms = round(time.time()*1000)
cursor = conn.cursor() cursor = conn.cursor()
entry = (User, date_time, Username,) entry = (User, time_ms, Username,)
table = fTable(Followers) table = fTable(Followers)
query = f"INSERT INTO {table} VALUES(?,?,?)" query = f"INSERT INTO {table} VALUES(?,?,?)"
cursor.execute(query, entry) cursor.execute(query, entry)
...@@ -263,9 +181,15 @@ def follow(conn, Username, Followers, User): ...@@ -263,9 +181,15 @@ def follow(conn, Username, Followers, User):
except sqlite3.IntegrityError: except sqlite3.IntegrityError:
pass pass
def get_user_id(conn, id):
cursor = conn.cursor()
cursor.execute('SELECT id FROM users WHERE id = ? LIMIT 1', (id,))
resultset = cursor.fetchall()
return resultset[0][0] if resultset else -1
def user(conn, config, User): def user(conn, config, User):
try: try:
date_time = str(datetime.now()) time_ms = round(time.time()*1000)
cursor = conn.cursor() cursor = conn.cursor()
entry = (int(User.id), entry = (int(User.id),
User.id, User.id,
...@@ -285,7 +209,7 @@ def user(conn, config, User): ...@@ -285,7 +209,7 @@ def user(conn, config, User):
User.is_verified, User.is_verified,
User.avatar, User.avatar,
User.background_image, User.background_image,
date_time) time_ms)
query = f"INSERT INTO users VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)" query = f"INSERT INTO users VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"
cursor.execute(query, entry) cursor.execute(query, entry)
...@@ -298,15 +222,9 @@ def user(conn, config, User): ...@@ -298,15 +222,9 @@ def user(conn, config, User):
except sqlite3.IntegrityError: except sqlite3.IntegrityError:
pass pass
def get_user_id(conn, id):
cursor = conn.cursor()
cursor.execute('SELECT id FROM users WHERE id = ? LIMIT 1', (id,))
resultset = cursor.fetchall()
return resultset[0][0] if resultset else -1
def tweets(conn, Tweet, config): def tweets(conn, Tweet, config):
try: try:
date_time = str(datetime.now()) time_ms = round(time.time()*1000)
cursor = conn.cursor() cursor = conn.cursor()
entry = (Tweet.id, entry = (Tweet.id,
Tweet.id_str, Tweet.id_str,
...@@ -327,53 +245,13 @@ def tweets(conn, Tweet, config): ...@@ -327,53 +245,13 @@ def tweets(conn, Tweet, config):
Tweet.name, Tweet.name,
Tweet.profile_image_url, Tweet.profile_image_url,
Tweet.link, Tweet.link,
Tweet.gif_url, ",".join(Tweet.mentions),
Tweet.gif_thumb, ",".join(Tweet.hashtags),
Tweet.video_url, ",".join(Tweet.urls),
Tweet.video_thumb, ",".join(Tweet.photos),
Tweet.is_reply_to,
Tweet.has_parent_tweet,
Tweet.in_reply_to_screen_name,
Tweet.in_reply_to_status_id,
Tweet.in_reply_to_status_id_str,
Tweet.in_reply_to_user_id,
Tweet.in_reply_to_user_id_str,
Tweet.is_quote_status,
Tweet.quote_id,
Tweet.quote_id_str,
Tweet.quote_url, Tweet.quote_url,
date_time) time_ms)
cursor.execute('INSERT INTO tweets VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)', entry) cursor.execute('INSERT INTO tweets VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)', entry)
if len(Tweet.mentions) > 0:
query = 'INSERT INTO mentions VALUES(?, ?, ?, ?)'
for mention in Tweet.mentions:
cursor.execute(query, (Tweet.id, mention["id"], mention["id_str"], mention["screen_name"]))
if len(Tweet.replies) > 0:
query = 'INSERT INTO replies VALUES(?, ?, ?, ?)'
for reply in Tweet.replies:
cursor.execute(query, (Tweet.id, reply["id"], reply["id_str"], reply["screen_name"]))
if len(Tweet.tags) > 0:
query = 'INSERT INTO tags VALUES(?, ?, ?, ?)'
for tag in Tweet.tags:
cursor.execute(query, (Tweet.id, tag["id"], tag["id_str"], tag["screen_name"]))
if len(Tweet.hashtags) > 0:
query = 'INSERT OR IGNORE INTO hashtags (tweet_id, tag_name) VALUES(?,?)'
for tag in Tweet.hashtags:
cursor.execute(query, (Tweet.id, tag))
if len(Tweet.urls) > 0:
query = 'INSERT INTO urls VALUES(?, ?)'
for url in Tweet.urls:
cursor.execute(query, (Tweet.id, url))
if len(Tweet.photos) > 0:
query = 'INSERT INTO photos VALUES(?, ?)'
for photo in Tweet.photos:
cursor.execute(query, (Tweet.id, photo))
if config.Favorites: if config.Favorites:
query = 'INSERT INTO favorites VALUES(?,?)' query = 'INSERT INTO favorites VALUES(?,?)'
......
...@@ -74,20 +74,11 @@ def createIndex(config, instance, **scope): ...@@ -74,20 +74,11 @@ def createIndex(config, instance, **scope):
"day": {"type": "integer"}, "day": {"type": "integer"},
"hour": {"type": "integer"}, "hour": {"type": "integer"},
"link": {"type": "text"}, "link": {"type": "text"},
"gif_url": {"type": "text"},
"gif_thumb": {"type": "text"},
"video_url": {"type": "text"},
"video_thumb": {"type": "text"},
"is_reply_to": {"type": "long"},
"has_parent_tweet": {"type": "long"},
"retweet": {"type": "text"}, "retweet": {"type": "text"},
"essid": {"type": "keyword"}, "essid": {"type": "keyword"},
"nlikes": {"type": "integer"}, "nlikes": {"type": "integer"},
"nreplies": {"type": "integer"}, "nreplies": {"type": "integer"},
"nretweets": {"type": "integer"}, "nretweets": {"type": "integer"},
"is_quote_status": {"type": "long"},
"quote_id": {"type": "long"},
"quote_id_str": {"type": "text"},
"quote_url": {"type": "text"}, "quote_url": {"type": "text"},
"search": {"type": "text"}, "search": {"type": "text"},
"near": {"type": "text"}, "near": {"type": "text"},
...@@ -223,20 +214,11 @@ def Tweet(Tweet, config): ...@@ -223,20 +214,11 @@ def Tweet(Tweet, config):
"day": day, "day": day,
"hour": hour(Tweet.datetime), "hour": hour(Tweet.datetime),
"link": Tweet.link, "link": Tweet.link,
"gif_url": Tweet.gif_url,
"gif_thumb": Tweet.gif_thumb,
"video_url": Tweet.video_url,
"video_thumb": Tweet.video_thumb,
"is_reply_to": Tweet.is_reply_to,
"has_parent_tweet": Tweet.has_parent_tweet,
"retweet": Tweet.retweet, "retweet": Tweet.retweet,
"essid": config.Essid, "essid": config.Essid,
"nlikes": int(Tweet.likes_count), "nlikes": int(Tweet.likes_count),
"nreplies": int(Tweet.replies_count), "nreplies": int(Tweet.replies_count),
"nretweets": int(Tweet.retweets_count), "nretweets": int(Tweet.retweets_count),
"is_quote_status": Tweet.is_quote_status,
"quote_id": Tweet.quote_id,
"quote_id_str": Tweet.quote_id_str,
"quote_url": Tweet.quote_url, "quote_url": Tweet.quote_url,
"search": str(config.Search), "search": str(config.Search),
"near": config.Near "near": config.Near
......
...@@ -78,19 +78,10 @@ def update(object, config): ...@@ -78,19 +78,10 @@ def update(object, config):
"day": day, "day": day,
"hour": hour(Tweet.datetime), "hour": hour(Tweet.datetime),
"link": Tweet.link, "link": Tweet.link,
"gif_url": Tweet.gif_url,
"gif_thumb": Tweet.gif_thumb,
"video_url": Tweet.video_url,
"video_thumb": Tweet.video_thumb,
"is_reply_to": Tweet.is_reply_to,
"has_parent_tweet": Tweet.has_parent_tweet,
"retweet": Tweet.retweet, "retweet": Tweet.retweet,
"nlikes": int(Tweet.likes_count), "nlikes": int(Tweet.likes_count),
"nreplies": int(Tweet.replies_count), "nreplies": int(Tweet.replies_count),
"nretweets": int(Tweet.retweets_count), "nretweets": int(Tweet.retweets_count),
"is_quote_status": Tweet.is_quote_status,
"quote_id": Tweet.quote_id,
"quote_id_str": Tweet.quote_id_str,
"quote_url": Tweet.quote_url, "quote_url": Tweet.quote_url,
"search": str(config.Search), "search": str(config.Search),
"near": config.Near "near": config.Near
......
...@@ -11,8 +11,6 @@ def tweetData(t): ...@@ -11,8 +11,6 @@ def tweetData(t):
"name": t.name, "name": t.name,
"place": t.place, "place": t.place,
"tweet": t.tweet, "tweet": t.tweet,
"tags": t.tags,
"replies": t.replies,
"mentions": t.mentions, "mentions": t.mentions,
"urls": t.urls, "urls": t.urls,
"photos": t.photos, "photos": t.photos,
...@@ -23,15 +21,7 @@ def tweetData(t): ...@@ -23,15 +21,7 @@ def tweetData(t):
"hashtags": t.hashtags, "hashtags": t.hashtags,
"link": t.link, "link": t.link,
"retweet": t.retweet, "retweet": t.retweet,
"gif_url": t.gif_url, "quote_url": t.quote_url
"gif_thumb": t.gif_thumb,
"video_url": t.video_url,
"video_thumb": t.video_thumb,
"is_quote_status": t.is_quote_status,
"quote_id": t.quote_id,
"quote_url": t.quote_url,
"is_reply_to": t.is_reply_to,
"has_parent_tweet": t.has_parent_tweet
} }
return data return data
...@@ -48,8 +38,6 @@ def tweetFieldnames(): ...@@ -48,8 +38,6 @@ def tweetFieldnames():
"name", "name",
"place", "place",
"tweet", "tweet",
"tags",
"replies",
"mentions", "mentions",
"urls", "urls",
"photos", "photos",
...@@ -60,15 +48,7 @@ def tweetFieldnames(): ...@@ -60,15 +48,7 @@ def tweetFieldnames():
"hashtags", "hashtags",
"link", "link",
"retweet", "retweet",
"gif_url", "quote_url"
"gif_thumb",
"video_url",
"video_thumb",
"is_quote_status",
"quote_id",
"quote_url",
"is_reply_to",
"has_parent_tweet"
] ]
return fieldnames return fieldnames
......
...@@ -11,84 +11,30 @@ class tweet: ...@@ -11,84 +11,30 @@ class tweet:
def __init__(self): def __init__(self):
pass pass
def getRawURLS(tw, link, config):
player = tw.find_all("div","PlayableMedia-player")
gif_url, gif_thumb, video_url, video_thumb = "", "", "", ""
for node in player:
styles = node.attrs['style'].split()
for style in styles:
if style.startswith('background'):
tmp = "background-image:url('"
style = style.replace(tmp, "")
if "tweet_video_thumb" in style:
gif_url = style.replace("')",'')
gif_url = gif_url.replace('.jpg','.mp4')
gif_url = gif_url.replace('https://pbs','https://video')
gif_url = gif_url.replace("_thumb", "")
gif_thumb = style.replace("')", "")
else:
video_url, video_thumb = "video","video_thumb"
return gif_url, gif_thumb, video_url, video_thumb
def getMentions(tw): def getMentions(tw):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getMentions') #logging.info("[<] " + str(datetime.now()) + ':: tweet+getMentions')
"""Extract ment from tweet """Extract ment from tweet
""" """
mentions = [{"id":int(mention["data-mentioned-user-id"]),"id_str": mention["data-mentioned-user-id"],"screen_name":mention.get('href').split("/")[-1]} for mention in tw.find_all('a',{'class':'twitter-atreply'})]
return mentions
def getReplies(tw):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getReplies')
"""Extract replies from tweet
"""
replyToUsersJSON = json.loads(tw["data-reply-to-users-json"])
replies = [{"id":int(reply["id_str"]),"id_str": reply["id_str"],"screen_name":reply["screen_name"]} for reply in replyToUsersJSON]
return replies
def getTags(tw):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getTags')
"""Extract tags from tweet
"""
tags = []
try: try:
tag_links = tw.find("div","media-tagging-block").find_all("a","js-user-profile-link") mentions = tw["data-mentions"].split(" ")
for tag in tag_links:
if tag.has_attr("data-user-id"):
tmpData = {
"id":int(tag["data-user-id"]),
"id_str": tag["data-user-id"],
"screen_name":tag.get('href').split("/")[-1]
}
tags.append(tmpData)
except: except:
tags = [] mentions = ""
return tags return mentions
def getQuoteInfo(tw): def getQuoteURL(tw):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getQuoteInfo') #logging.info("[<] " + str(datetime.now()) + ':: tweet+getQuoteInfo')
"""Extract quote from tweet """Extract quote from tweet
""" """
base_twitter = "https://twitter.com" base_twitter = "https://twitter.com"
quote_status = 0
quote_id = 0
quote_id_str = ""
quote_url = "" quote_url = ""
try: try:
quote = tw.find("div","QuoteTweet-innerContainer") quote = tw.find("div","QuoteTweet-innerContainer")
quote_status = 1
quote_id = int(quote["data-item-id"])
quote_id_str = quote["data-item-id"]
quote_url = base_twitter + quote.get("href") quote_url = base_twitter + quote.get("href")
except: except:
quote_status = 0 quote_url = ""
return quote_status, quote_id, quote_id_str, quote_url return quote_url
def getText(tw): def getText(tw):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getText') #logging.info("[<] " + str(datetime.now()) + ':: tweet+getText')
...@@ -134,8 +80,6 @@ def Tweet(tw, location, config): ...@@ -134,8 +80,6 @@ def Tweet(tw, location, config):
for img in tw.findAll("img", "Emoji Emoji--forText"): for img in tw.findAll("img", "Emoji Emoji--forText"):
img.replaceWith(img["alt"]) img.replaceWith(img["alt"])
t.mentions = getMentions(tw) t.mentions = getMentions(tw)
t.tags = getTags(tw)
t.replies = getReplies(tw)
t.urls = [link.attrs["data-expanded-url"] for link in tw.find_all('a',{'class':'twitter-timeline-link'}) if link.has_attr("data-expanded-url")] t.urls = [link.attrs["data-expanded-url"] for link in tw.find_all('a',{'class':'twitter-timeline-link'}) if link.has_attr("data-expanded-url")]
t.photos = [photo_node.attrs['data-image-url'] for photo_node in tw.find_all("div", "AdaptiveMedia-photoContainer")] t.photos = [photo_node.attrs['data-image-url'] for photo_node in tw.find_all("div", "AdaptiveMedia-photoContainer")]
t.tweet = getText(tw) t.tweet = getText(tw)
...@@ -146,13 +90,5 @@ def Tweet(tw, location, config): ...@@ -146,13 +90,5 @@ def Tweet(tw, location, config):
t.likes_count = getStat(tw, "favorite") t.likes_count = getStat(tw, "favorite")
t.link = f"https://twitter.com/{t.username}/status/{t.id}" t.link = f"https://twitter.com/{t.username}/status/{t.id}"
t.retweet = getRetweet(config.Profile, t.username, config.Username) t.retweet = getRetweet(config.Profile, t.username, config.Username)
t.gif_url, t.gif_thumb, t.video_url, t.video_thumb = getRawURLS(tw, t.link, config) t.quote_url = getQuoteURL(tw)
t.is_quote_status, t.quote_id, t.quote_id_str, t.quote_url = getQuoteInfo(tw)
t.is_reply_to = int(bool(tw["data-is-reply-to"])) if tw.has_attr("data-is-reply-to") else 0
t.has_parent_tweet = int(bool(tw["data-has-parent-tweet"])) if tw.has_attr("data-has-parent-tweet") else 0
t.in_reply_to_screen_name = ""
t.in_reply_to_status_id = 0
t.in_reply_to_status_id_str = ""
t.in_reply_to_user_id = 0
t.in_reply_to_user_id_str = ""
return t return t
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment