Commit 84962bb6 authored by Francesco Poldi's avatar Francesco Poldi Committed by GitHub

Finished dataframes storing option (#224)

* Update (#174)

* add function to clean accumulated pandas storage data

* Fixed typo, dataname, removed attributes

* Added config options and config var

* Added autoclean

Works for search now

* Added Elasticsearch count options

* Added silent output and objects for users and followers

* Update

* Clean following/followers attr

* Final construct of object

* Redesign

* Little fix

* Debug

* Debug

* Globals

* Removed debug

* Globals pt 2

* Mix

* Added _old_obj to store previous scrape

* Prefix

* Pre fix pt 2

* commented

* Fix for object follow

* Update

* Update

* Completed follow_object

* Pandas object for followers and following

* Finished pandas object for followers and following

* Added docstrings in Twint.py

* Added lowercase

#170

* Finished lower case

Close #170

* Fix defaults

* Added some edits

In `panda.py` changing the structure of the dataframe for users that one is following/followed, in `config.py` added autoupdate so that one does not have to call `storage.panda.get()` at every run, in `output.py` edited follow_object, in `run.py` added autoupdate function for panda, in`tweet.py` just some docstrings

* Update for panda and objects

* Finished storing data into dataframes #173

Now followers, following, tweets, and user details are saved in dataframes
parent 421205c2
...@@ -8,17 +8,20 @@ https://github.com/haccer/twint/wiki ...@@ -8,17 +8,20 @@ https://github.com/haccer/twint/wiki
Licensed under MIT License Licensed under MIT License
Copyright (c) 2018 Cody Zacharias Copyright (c) 2018 Cody Zacharias
''' '''
import argparse
import twint
import sys import sys
import os import os
import argparse
import twint
def error(error, message): def error(_error, message):
print("[-] {}: {}".format(error, message)) """ Print errors to stdout
"""
print("[-] {}: {}".format(_error, message))
sys.exit(0) sys.exit(0)
def check(args): def check(args):
# Error checking """ Error checking
"""
if args.username is not None: if args.username is not None:
if args.verified: if args.verified:
error("Contradicting Args", error("Contradicting Args",
...@@ -42,8 +45,9 @@ def check(args): ...@@ -42,8 +45,9 @@ def check(args):
# Proxy stuff # Proxy stuff
if args.proxy_host is not None: if args.proxy_host is not None:
import socks
import socket
if args.proxy_host.lower() == "tor": if args.proxy_host.lower() == "tor":
import socks, socket
socks.set_default_proxy(socks.SOCKS5, "localhost", 9050) socks.set_default_proxy(socks.SOCKS5, "localhost", 9050)
socket.socket = socks.socksocket socket.socket = socks.socksocket
elif args.proxy_port and args.proxy_type: elif args.proxy_port and args.proxy_type:
...@@ -55,7 +59,6 @@ def check(args): ...@@ -55,7 +59,6 @@ def check(args):
_type = socks.HTTP _type = socks.HTTP
else: else:
error("Error", "Proxy types allowed are: socks5, socks4, and http.") error("Error", "Proxy types allowed are: socks5, socks4, and http.")
import socks, socket
socks.set_default_proxy(_type, args.proxy_host, int(args.proxy_port)) socks.set_default_proxy(_type, args.proxy_host, int(args.proxy_port))
socket.socket = socks.socksocket socket.socket = socks.socksocket
else: else:
...@@ -64,20 +67,23 @@ def check(args): ...@@ -64,20 +67,23 @@ def check(args):
if args.proxy_port or args.proxy_type: if args.proxy_port or args.proxy_type:
error("Error", "Please specify --proxy-host, --proxy-port, and --proxy-type") error("Error", "Please specify --proxy-host, --proxy-port, and --proxy-type")
def loadUserList(ul, type): def loadUserList(ul, _type):
""" Concatenate users
"""
if os.path.exists(os.path.abspath(ul)): if os.path.exists(os.path.abspath(ul)):
userlist = open(os.path.abspath(ul), "r").read().splitlines() userlist = open(os.path.abspath(ul), "r").read().splitlines()
else: else:
userlist = ul.split(",") userlist = ul.split(",")
if type == "search": if _type == "search":
un = "" un = ""
for user in userlist: for user in userlist:
un += "%20OR%20from%3A" + user un += "%20OR%20from%3A" + user
return un[15:] return un[15:]
else:
return userlist return userlist
def initialize(args): def initialize(args):
""" Set default values for config from args
"""
c = twint.Config() c = twint.Config()
c.Username = args.username c.Username = args.username
c.User_id = args.userid c.User_id = args.userid
...@@ -123,9 +129,12 @@ def initialize(args): ...@@ -123,9 +129,12 @@ def initialize(args):
c.Media = args.media c.Media = args.media
c.Replies = args.replies c.Replies = args.replies
c.Pandas_clean = args.pandas_clean c.Pandas_clean = args.pandas_clean
c.ES_count = {"likes":True, "replies":True, "retweets":True}
return c return c
def options(): def options():
""" Parse arguments
"""
ap = argparse.ArgumentParser(prog="Twint.py", ap = argparse.ArgumentParser(prog="Twint.py",
usage="python3 %(prog)s [options]", usage="python3 %(prog)s [options]",
description="TWINT - An Advanced Twitter Scraping Tool.") description="TWINT - An Advanced Twitter Scraping Tool.")
...@@ -151,7 +160,8 @@ def options(): ...@@ -151,7 +160,8 @@ def options():
ap.add_argument("--limit", help="Number of Tweets to pull (Increments of 20).") ap.add_argument("--limit", help="Number of Tweets to pull (Increments of 20).")
ap.add_argument("--count", help="Display number of Tweets scraped at the end of session.", ap.add_argument("--count", help="Display number of Tweets scraped at the end of session.",
action="store_true") action="store_true")
ap.add_argument("--stats", help="Show number of replies, retweets, and likes.", action="store_true") ap.add_argument("--stats", help="Show number of replies, retweets, and likes.",
action="store_true")
ap.add_argument("--hostname", help="Store the mysql database host") ap.add_argument("--hostname", help="Store the mysql database host")
ap.add_argument("-db", "--database", help="Store Tweets in a sqlite3 or mysql database.") ap.add_argument("-db", "--database", help="Store Tweets in a sqlite3 or mysql database.")
ap.add_argument("--DB_user", help="Store the mysql database user") ap.add_argument("--DB_user", help="Store the mysql database user")
...@@ -164,53 +174,71 @@ def options(): ...@@ -164,53 +174,71 @@ def options():
ap.add_argument("--proxy-type", help="Socks5, HTTP, etc.") ap.add_argument("--proxy-type", help="Socks5, HTTP, etc.")
ap.add_argument("--proxy-host", help="Proxy hostname or IP.") ap.add_argument("--proxy-host", help="Proxy hostname or IP.")
ap.add_argument("--proxy-port", help="The port of the proxy server.") ap.add_argument("--proxy-port", help="The port of the proxy server.")
ap.add_argument("--essid", help="Elasticsearch Session ID, use this to differentiate scraping sessions.") ap.add_argument("--essid",
help="Elasticsearch Session ID, use this to differentiate scraping sessions.",
nargs="?", default="")
ap.add_argument("--userlist", help="Userlist from list or file.") ap.add_argument("--userlist", help="Userlist from list or file.")
ap.add_argument("--retweets", help="Include user's Retweets (Warning: limited).", action="store_true") ap.add_argument("--retweets",
help="Include user's Retweets (Warning: limited).",
action="store_true")
ap.add_argument("--format", help="Custom output format (See wiki for details).") ap.add_argument("--format", help="Custom output format (See wiki for details).")
ap.add_argument("--user-full", help="Collect all user information (Use with followers or following only).", ap.add_argument("--user-full",
help="Collect all user information (Use with followers or following only).",
action="store_true") action="store_true")
ap.add_argument("--profile-full", ap.add_argument("--profile-full",
help="Slow, but effective method of collecting a user's Tweets (Including Retweets).", help="Slow, but effective method of collecting a user's Tweets and RT.",
action="store_true") action="store_true")
ap.add_argument("--store-pandas", help="Save Tweets in a DataFrame (Pandas) file.") ap.add_argument("--store-pandas", help="Save Tweets in a DataFrame (Pandas) file.")
ap.add_argument("--pandas-type", help="Specify HDF5 or Pickle (HDF5 as default)") ap.add_argument("--pandas-type",
ap.add_argument("--search_name", help="Name for identify the search like -3dprinter stuff- only for mysql") help="Specify HDF5 or Pickle (HDF5 as default)", nargs="?", default="HDF5")
ap.add_argument("-it", "--index-tweets", help="Custom Elasticsearch Index name for Tweets.") ap.add_argument("--search_name",
ap.add_argument("-if", "--index-follow", help="Custom Elasticsearch Index name for Follows.") help="Name for identify the search like -3dprinter stuff- only for mysql")
ap.add_argument("-iu", "--index-users", help="Custom Elasticsearch Index name for Users.") ap.add_argument("-it", "--index-tweets",
ap.add_argument("--debug", help="Store information in debug logs", action="store_true") help="Custom Elasticsearch Index name for Tweets.", nargs="?", default="twint")
ap.add_argument("-if", "--index-follow",
help="Custom Elasticsearch Index name for Follows.",
nargs="?", default="twintGraph")
ap.add_argument("-iu", "--index-users", help="Custom Elasticsearch Index name for Users.",
nargs="?", default="twintUser")
ap.add_argument("--debug",
help="Store information in debug logs", action="store_true")
ap.add_argument("--resume", help="Resume from Tweet ID.") ap.add_argument("--resume", help="Resume from Tweet ID.")
ap.add_argument("--videos", help="Display only Tweets with videos.", action="store_true") ap.add_argument("--videos", help="Display only Tweets with videos.", action="store_true")
ap.add_argument("--images", help="Display only Tweets with images.", action="store_true") ap.add_argument("--images", help="Display only Tweets with images.", action="store_true")
ap.add_argument("--media", help="Display Tweets with only images or videos.", action="store_true") ap.add_argument("--media",
help="Display Tweets with only images or videos.", action="store_true")
ap.add_argument("--replies", help="Display replies to a subject.", action="store_true") ap.add_argument("--replies", help="Display replies to a subject.", action="store_true")
ap.add_argument("-pc", "--pandas-clean",
help="Automatically clean Pandas dataframe at every scrape.")
ap.add_argument("-ec", "--es-count", nargs="?", default="",
help="What NOT to count: likes, replies, retweets; only for Elasticsearch.")
ap.add_argument("-pc","--pandas-clean", help="Automatically clean Pandas dataframe at every scrape.") ap.add_argument("-pc","--pandas-clean", help="Automatically clean Pandas dataframe at every scrape.")
args = ap.parse_args() args = ap.parse_args()
return args return args
def main(): def main():
""" Main
"""
args = options() args = options()
check(args) check(args)
if args.userlist: if args.userlist:
args.username = loadUserList(args.userlist, "search") args.username = loadUserList(args.userlist, "search")
if not args.pandas_type: if args.pandas_clean:
args.pandas_type = "HDF5" twint.storage.panda.clean()
if not args.index_tweets: c = initialize(args)
args.index_tweets = "twint"
if not args.index_follow: if "likes" in str(args.es_count):
args.index_follow = "twintGraph" c.ES_count["likes"] = False
if not args.index_users: if "replies" in str(args.es_count):
args.index_users = "twintUser" c.ES_count["replies"] = False
if not args.essid: if "retweets" in str(args.es_count):
args.essid = "" c.ES_count["retweets"] = False
if args.pandas_clean: if args.pandas_clean:
twint.storage.panda.clean() twint.storage.panda.clean()
......
...@@ -49,3 +49,6 @@ class Config: ...@@ -49,3 +49,6 @@ class Config:
Media = False Media = False
Replies = False Replies = False
Pandas_clean = True Pandas_clean = True
ES_count = {"likes":True,"replies":True,"retweets":True}
Lowercase = False
Pandas_au = True
...@@ -4,7 +4,16 @@ from .user import User ...@@ -4,7 +4,16 @@ from .user import User
from datetime import datetime from datetime import datetime
from .storage import db, elasticsearch, write, panda from .storage import db, elasticsearch, write, panda
follow_object = {}
tweets_object = [] tweets_object = []
user_object = []
_follow_list = []
def clean_follow_list():
global _follow_list
_follow_list = []
def datecheck(datestamp, config): def datecheck(datestamp, config):
if config.Since and config.Until: if config.Since and config.Until:
...@@ -21,7 +30,13 @@ def is_tweet(tw): ...@@ -21,7 +30,13 @@ def is_tweet(tw):
except: except:
return False return False
def _output(obj, output, config): def _output(obj, output, config, **extra):
if config.Lowercase:
obj.username = obj.username.lower()
for i in range(len(obj.mentions)):
obj.mentions[i] = obj.mentions[i].lower()
for i in range(len(obj.hashtags)):
obj.hashtags[i] = obj.hashtags[i].lower()
if config.Output != None: if config.Output != None:
if config.Store_csv: if config.Store_csv:
try : try :
...@@ -33,13 +48,15 @@ def _output(obj, output, config): ...@@ -33,13 +48,15 @@ def _output(obj, output, config):
else: else:
write.Text(output, config.Output) write.Text(output, config.Output)
if config.Pandas: if config.Pandas and config.User_full:
panda.update(obj, config.Essid) panda.update(obj, config)
if extra.get("follow_list"):
follow_object.username = config.Username
follow_object.action = config.Following*"following" + config.Followers*"followers"
follow_object.users = _follow_list
panda.update(follow_object, config.Essid)
if config.Elasticsearch: if config.Elasticsearch:
if config.Store_object: print("", end=".", flush=True)
tweets_object.append(obj)
else:
print(output, end=".", flush=True)
else: else:
if config.Store_object: if config.Store_object:
tweets_object.append(obj) tweets_object.append(obj)
...@@ -63,9 +80,14 @@ async def Tweets(tw, location, config, conn): ...@@ -63,9 +80,14 @@ async def Tweets(tw, location, config, conn):
if config.Elasticsearch: if config.Elasticsearch:
elasticsearch.Tweet(tweet, config) elasticsearch.Tweet(tweet, config)
if config.Store_object:
tweets_object.append(tweet) #twint.tweet.tweet
_output(tweet, output, config) _output(tweet, output, config)
async def Users(u, config, conn): async def Users(u, config, conn):
global user_object
user = User(u) user = User(u)
output = format.User(config.Format, user) output = format.User(config.Format, user)
...@@ -81,13 +103,27 @@ async def Users(u, config, conn): ...@@ -81,13 +103,27 @@ async def Users(u, config, conn):
user.join_date = _save_date user.join_date = _save_date
user.join_time = _save_time user.join_time = _save_time
if config.Store_object:
user_object.append(user) # twint.user.user
_output(user, output, config) _output(user, output, config)
async def Username(username, config, conn): async def Username(username, config, conn):
global follow_object
follow_var = config.Following*"following" + config.Followers*"followers"
if config.Database: if config.Database:
db.follow(conn, config.Username, config.Followers, username) db.follow(conn, config.Username, config.Followers, username)
if config.Elasticsearch: if config.Elasticsearch:
elasticsearch.Follow(username, config) elasticsearch.Follow(username, config)
_output(username, username, config) if config.Store_object or config.Pandas:
try:
_ = follow_object[config.Username][follow_var]
except KeyError:
follow_object.update({config.Username: {follow_var: []}})
follow_object[config.Username][follow_var].append(username)
if config.Pandas_au:
panda.update(follow_object[config.Username], config)
_output(username, username, config, follow_list=_follow_list)
...@@ -16,6 +16,9 @@ class Twint: ...@@ -16,6 +16,9 @@ class Twint:
self.d = datelock.Set(self.config.Until, self.config.Since) self.d = datelock.Set(self.config.Until, self.config.Since)
verbose.Elastic(config.Elasticsearch) verbose.Elastic(config.Elasticsearch)
if self.config.Store_object:
output.clean_follow_list()
if self.config.Pandas_clean: if self.config.Pandas_clean:
storage.panda.clean() storage.panda.clean()
...@@ -124,17 +127,35 @@ def Favorites(config): ...@@ -124,17 +127,35 @@ def Favorites(config):
run(config) run(config)
def Followers(config): def Followers(config):
output.clean_follow_list()
config.Followers = True config.Followers = True
config.Following = False
run(config) run(config)
if config.Pandas_au:
storage.panda._autoget("followers")
if config.User_full:
storage.panda._autoget("user")
storage.panda.clean()
def Following(config): def Following(config):
output.clean_follow_list()
config.Following = True config.Following = True
config.Followers = False
run(config) run(config)
if config.Pandas_au:
storage.panda._autoget("following")
if config.User_full:
storage.panda._autoget("user")
storage.panda.clean()
def Profile(config): def Profile(config):
config.Profile = True
run(config) run(config)
def Search(config): def Search(config):
config.TwitterSearch = True config.TwitterSearch = True
config.Following = False
config.Followers = False
run(config) run(config)
if config.Pandas_au:
storage.panda._autoget("tweet")
...@@ -73,6 +73,7 @@ def Tweet(Tweet, config): ...@@ -73,6 +73,7 @@ def Tweet(Tweet, config):
} }
actions.append(j_data) actions.append(j_data)
if config.ES_count["likes"] is not False:
for l in range(int(Tweet.likes)): for l in range(int(Tweet.likes)):
j_data = { j_data = {
"_index": config.Index_tweets, "_index": config.Index_tweets,
...@@ -99,6 +100,7 @@ def Tweet(Tweet, config): ...@@ -99,6 +100,7 @@ def Tweet(Tweet, config):
actions.append(j_data) actions.append(j_data)
nLikes += 1 nLikes += 1
if config.ES_count["replies"] is not False:
for rep in range(int(Tweet.replies)): for rep in range(int(Tweet.replies)):
j_data = { j_data = {
"_index": config.Index_tweets, "_index": config.Index_tweets,
...@@ -125,6 +127,7 @@ def Tweet(Tweet, config): ...@@ -125,6 +127,7 @@ def Tweet(Tweet, config):
actions.append(j_data) actions.append(j_data)
nReplies += 1 nReplies += 1
if config.ES_count["retweets"] is not False:
for ret in range(int(Tweet.retweets)): for ret in range(int(Tweet.retweets)):
j_data = { j_data = {
"_index": config.Index_tweets, "_index": config.Index_tweets,
......
from .elasticsearch import *
from time import strftime, localtime from time import strftime, localtime
import pandas as pd import pandas as pd
import warnings import warnings
from .elasticsearch import *
Tweets_df = None
Follow_df = None
User_df = None
_object_blocks = {
"tweet": [],
"user": [],
"following": [],
"followers": []
}
_type = ""
def _concat(df, type):
if df is None:
df = pd.DataFrame(_object_blocks[type])
else:
_df = pd.DataFrame(_object_blocks[type])
df = pd.concat([df, _df], sort=True)
return df
def _autoget(type):
global Tweets_df
global Follow_df
global User_df
if type == "tweet":
Tweets_df = _concat(Tweets_df, type)
if type == "followers" or type == "following":
Follow_df = _concat(Follow_df, type)
if type == "user":
User_df = _concat(User_df, type)
_blocks = []
def update(Tweet, session): def update(object, config):
dt = f"{Tweet.datestamp} {Tweet.timestamp}" global _type
try:
_type = ((object.type == "tweet")*"tweet" +
(object.type == "user")*"user")
except AttributeError:
_type = config.Following*"following" + config.Followers*"followers"
if _type == "tweet":
dt = f"{object.datestamp} {object.timestamp}"
_data = { _data = {
"id": Tweet.id, "id": object.id,
"date": dt, "date": dt,
"timezone": Tweet.timezone, "timezone": object.timezone,
"location": Tweet.location, "location": object.location,
"tweet": Tweet.tweet, "tweet": object.tweet,
"hashtags": Tweet.hashtags, "hashtags": object.hashtags,
"user_id": Tweet.user_id, "user_id": object.user_id,
"username": Tweet.username, "username": object.username,
"link": Tweet.link, "link": object.link,
"retweet": Tweet.retweet, "retweet": object.retweet,
"user_rt": Tweet.user_rt, "user_rt": object.user_rt,
"essid": str(session), "essid": config.Essid,
'mentions': Tweet.mentions 'mentions': object.mentions
}
_object_blocks[_type].append(_data)
elif _type == "user":
_data = {
"id": object.id,
"name": object.name,
"username": object.username,
"bio": object.bio,
"location": object.location,
"url": object.url,
"join_datetime": object.join_date + " " + object.join_time,
"join_date": object.join_date,
"join_time": object.join_time,
"tweets": object.tweets,
"following": object.following,
"followers": object.followers,
"likes": object.likes,
"media": object.media_count,
"private": object.is_private,
"verified": object.is_verified,
"avatar": object.avatar,
"session": str(config.Essid)
}
_object_blocks[_type].append(_data)
elif _type == "followers" or _type == "following":
_data = {
config.Following*"following" + config.Followers*"followers" :
{config.Username: object[_type]}
} }
_blocks.append(_data) _object_blocks[_type] = _data
else:
print("Wrong type of object passed!")
def get():
df = pd.DataFrame(_blocks)
return df
def clean(): def clean():
_blocks.clear() _object_blocks["tweet"].clear()
_object_blocks["following"].clear()
_object_blocks["followers"].clear()
_object_blocks["user"].clear()
def save(_filename, _dataframe, **options): def save(_filename, _dataframe, **options):
if options.get("dataname"): if options.get("dataname"):
...@@ -41,15 +110,16 @@ def save(_filename, _dataframe, **options): ...@@ -41,15 +110,16 @@ def save(_filename, _dataframe, **options):
if not options.get("type"): if not options.get("type"):
with warnings.catch_warnings(): with warnings.catch_warnings():
warnings.simplefilter("ignore") warnings.simplefilter("ignore")
_store = pd.HDFStore(_filename) _store = pd.HDFStore(_filename + ".h5")
_store[_dataname] = _dataframe _store[_dataname] = _dataframe
_store.close() _store.close()
elif options.get("type") == "Pickle": elif options.get("type") == "Pickle":
with warnings.catch_warnings(): with warnings.catch_warnings():
warnings.simplefilter("ignore") warnings.simplefilter("ignore")
_dataframe.to_pickle(_filename) _dataframe.to_pickle(_filename + ".pkl")
else: else:
print("Please specify: filename, DataFrame, DataFrame name and type (HDF5, default, or Pickle") print("""Please specify: filename, DataFrame, DataFrame name and type
(HDF5, default, or Pickle)""")
def read(_filename, **options): def read(_filename, **options):
if not options.get("dataname"): if not options.get("dataname"):
...@@ -58,11 +128,12 @@ def read(_filename, **options): ...@@ -58,11 +128,12 @@ def read(_filename, **options):
_dataname = options.get("dataname") _dataname = options.get("dataname")
if not options.get("type"): if not options.get("type"):
_store = pd.HDFStore(_filename) _store = pd.HDFStore(_filename + ".h5")
df = _store[_dataname] _df = _store[_dataname]
return df return _df
elif options.get("type") == "Pickle": elif options.get("type") == "Pickle":
df = pd.read_pickle(_filename) _df = pd.read_pickle(_filename + ".pkl")
return df return _df
else: else:
print("Please specify: DataFrame, DataFrame name (twint as default), filename and type (HDF5, default, or Pickle") print("""Please specify: DataFrame, DataFrame name (twint as default),
filename and type (HDF5, default, or Pickle""")
...@@ -2,9 +2,16 @@ from time import strftime, localtime ...@@ -2,9 +2,16 @@ from time import strftime, localtime
import re import re
class tweet: class tweet:
"""Define Tweet class
"""
type = "tweet"
def __init__(self):
pass pass
def getMentions(tw): def getMentions(tw):
"""Extract ment from tweet
"""
try: try:
mentions = tw.find("div", "js-original-tweet")["data-mentions"].split(" ") mentions = tw.find("div", "js-original-tweet")["data-mentions"].split(" ")
except: except:
...@@ -13,6 +20,8 @@ def getMentions(tw): ...@@ -13,6 +20,8 @@ def getMentions(tw):
return mentions return mentions
def getText(tw): def getText(tw):
"""Replace some text
"""
text = tw.find("p", "tweet-text").text text = tw.find("p", "tweet-text").text
text = text.replace("\n", " ") text = text.replace("\n", " ")
text = text.replace("http", " http") text = text.replace("http", " http")
...@@ -33,9 +42,13 @@ def getTweet(tw, mentions): ...@@ -33,9 +42,13 @@ def getTweet(tw, mentions):
return text return text
def getHashtags(text): def getHashtags(text):
"""Get hashtags of tweet
"""
return re.findall(r'(?i)\#\w+', text, flags=re.UNICODE) return re.findall(r'(?i)\#\w+', text, flags=re.UNICODE)
def getStat(tw, _type): def getStat(tw, _type):
"""Get stats about Tweet
"""
st = f"ProfileTweet-action--{_type} u-hiddenVisually" st = f"ProfileTweet-action--{_type} u-hiddenVisually"
return tw.find("span", st).find("span")["data-tweet-stat-count"] return tw.find("span", st).find("span")["data-tweet-stat-count"]
...@@ -44,6 +57,8 @@ def getRetweet(profile, username, user): ...@@ -44,6 +57,8 @@ def getRetweet(profile, username, user):
return True return True
def getUser_rt(profile, username, user): def getUser_rt(profile, username, user):
"""Get username that retweeted
"""
if getRetweet(profile, username, user): if getRetweet(profile, username, user):
user_rt = user user_rt = user
else: else:
...@@ -52,6 +67,8 @@ def getUser_rt(profile, username, user): ...@@ -52,6 +67,8 @@ def getUser_rt(profile, username, user):
return user_rt return user_rt
def Tweet(tw, location, config): def Tweet(tw, location, config):
"""Create Tweet object
"""
t = tweet() t = tweet()
t.id = tw.find("div")["data-item-id"] t.id = tw.find("div")["data-item-id"]
t.datetime = int(tw.find("span", "_timestamp")["data-time"]) t.datetime = int(tw.find("span", "_timestamp")["data-time"])
......
class user: class user:
type = "user"
def __init__(self):
pass pass
def inf(ur, _type): def inf(ur, _type):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment