Commit 92c23739 authored by andytnt's avatar andytnt Committed by Francesco Poldi

Fix missing commits (#258)

parent ba9f37eb
...@@ -17,9 +17,10 @@ PUT twintuser ...@@ -17,9 +17,10 @@ PUT twintuser
"followers": {"type": "integer"}, "followers": {"type": "integer"},
"likes": {"type": "integer"}, "likes": {"type": "integer"},
"media": {"type": "integer"}, "media": {"type": "integer"},
"private": {"type": "boolean"}, "private": {"type": "integer"},
"verified": {"type": "boolean"}, "verified": {"type": "integer"},
"avatar": {"type": "text"}, "avatar": {"type": "text"},
"background_image": {"type": "text"},
"session": {"type": "keyword"} "session": {"type": "keyword"}
} }
} }
......
...@@ -10,6 +10,7 @@ from aiohttp_socks import SocksConnector, SocksVer ...@@ -10,6 +10,7 @@ from aiohttp_socks import SocksConnector, SocksVer
from . import url from . import url
from .output import Tweets, Users from .output import Tweets, Users
from .user import inf
#import logging #import logging
...@@ -102,6 +103,14 @@ async def Username(_id): ...@@ -102,6 +103,14 @@ async def Username(_id):
return soup.find("a", "fn url alternate-context")["href"].replace("/", "") return soup.find("a", "fn url alternate-context")["href"].replace("/", "")
async def UserId(username):
#loggin.info("[<] " + str(datetime.now()) + ':: get+UserId')
url = f"http://twitter.com/{username}?lang=en"
r = await Request(url)
soup = BeautifulSoup(r, "html.parser")
return int(inf(soup, "id"))
async def Tweet(url, config, conn): async def Tweet(url, config, conn):
#loggin.info("[<] " + str(datetime.now()) + ':: Tweet') #loggin.info("[<] " + str(datetime.now()) + ':: Tweet')
try: try:
......
from . import format from . import format, get
from .tweet import Tweet from .tweet import Tweet
from .user import User from .user import User
from datetime import datetime from datetime import datetime
...@@ -78,13 +78,34 @@ def _output(obj, output, config, **extra): ...@@ -78,13 +78,34 @@ def _output(obj, output, config, **extra):
except UnicodeEncodeError: except UnicodeEncodeError:
print("unicode error [x] output._output") print("unicode error [x] output._output")
async def tweetUserData(tweet,config, conn):
user_ids = set()
usernames = []
for user in tweet.mentions:
if db.get_user_id(conn, user["id"]) == -1 and user["id"] not in user_ids:
user_ids.add(user["id"])
usernames.append(user["screen_name"])
for user in tweet.tags:
if db.get_user_id(conn, user["id"]) == -1 and user["id"] not in user_ids:
user_ids.add(user["id"])
usernames.append(user["screen_name"])
for user in tweet.replies:
if db.get_user_id(conn, user["id"]) == -1 and user["id"] not in user_ids:
user_ids.add(user["id"])
usernames.append(user["screen_name"])
for user in usernames:
url = f"http://twitter.com/{user}?lang=en"
await get.User(url, config, conn)
async def Tweets(tw, location, config, conn): async def Tweets(tw, location, config, conn):
#logging.info("[<] " + str(datetime.now()) + ':: output+Tweets') #logging.info("[<] " + str(datetime.now()) + ':: output+Tweets')
copyright = tw.find("div", "StreamItemContent--withheld") copyright = tw.find("div", "StreamItemContent--withheld")
if copyright is None and is_tweet(tw): if copyright is None and is_tweet(tw):
tweet = Tweet(tw, location, config) tweet = Tweet(tw, location, config)
if config.Database is not None and config.User_info: if config.Database is not None and config.User_info:
await tweetUserData(tweet, config, conn) await tweetUserData(tweet, config, conn)
if datecheck(tweet.datestamp, config): if datecheck(tweet.datestamp, config):
output = format.Tweet(config, tweet) output = format.Tweet(config, tweet)
...@@ -110,7 +131,7 @@ async def Users(u, config, conn): ...@@ -110,7 +131,7 @@ async def Users(u, config, conn):
output = format.User(config.Format, user) output = format.User(config.Format, user)
if config.Database: if config.Database:
db.user(conn, config.Username, config.Followers, user) db.user(conn, config, user)
if config.Elasticsearch: if config.Elasticsearch:
_save_date = user.join_date _save_date = user.join_date
......
...@@ -101,6 +101,9 @@ class Twint: ...@@ -101,6 +101,9 @@ class Twint:
if self.config.User_id is not None: if self.config.User_id is not None:
self.config.Username = await get.Username(self.config.User_id) self.config.Username = await get.Username(self.config.User_id)
if self.config.Username is not None:
self.config.User_id = await get.UserId(self.config.Username)
if self.config.TwitterSearch and self.config.Since and self.config.Until: if self.config.TwitterSearch and self.config.Since and self.config.Until:
_days = timedelta(days=int(self.config.Timedelta)) _days = timedelta(days=int(self.config.Timedelta))
while self.d._since < self.d._until: while self.d._since < self.d._until:
......
...@@ -18,76 +18,181 @@ def init(db): ...@@ -18,76 +18,181 @@ def init(db):
try: try:
conn = sqlite3.connect(db) conn = sqlite3.connect(db)
cursor = conn.cursor() cursor = conn.cursor()
table_users = """
CREATE TABLE IF NOT EXISTS
users(
id integer not null,
id_str text not null,
name text,
username text not null,
bio text,
location text,
url text,
join_date text not null,
join_time text not null,
tweets integer,
following integer,
followers integer,
likes integer,
media integer,
private integer not null,
verified integer not null,
profile_image_url text not null,
background_image text,
date_update text not null,
CONSTRAINT users_pk PRIMARY KEY (id)
);
"""
cursor.execute(table_users)
table_tweets = """ table_tweets = """
CREATE TABLE IF NOT EXISTS CREATE TABLE IF NOT EXISTS
tweets ( tweets (
id integer not null, id integer not null,
user_id integer, id_str text not null,
tweet text default '',
conversation_id text not null,
created_at integer not null,
date text not null, date text not null,
time text not null, time text not null,
timezone text not null, timezone text not null,
place text default '',
location text not null, location text not null,
user text not null, replies_count integer,
tweet text not null, likes_count integer,
replies integer, retweets_count integer,
likes integer, user_id integer not null,
retweets integer, user_id_str text not null,
hashtags text, screen_name text not null,
name text default '',
profile_image_url text,
link text, link text,
retweet bool, gif_url text,
user_rt text, gif_thumb text,
mentions text, video_url text,
video_thumb text,
is_reply_to integer,
has_parent_tweet integer,
in_reply_to_screen_name text defualt '',
in_reply_to_status_id integer,
in_reply_to_status_id_str text default '',
in_reply_to_user_id integer,
in_reply_to_user_id_str text default '',
is_quote_status integer,
quote_id integer,
quote_id_str text,
quote_url text,
date_update text not null, date_update text not null,
PRIMARY KEY (id) PRIMARY KEY (id)
); );
""" """
cursor.execute(table_tweets) cursor.execute(table_tweets)
table_followers_names = """ table_retweets = """
CREATE TABLE IF NOT EXISTS CREATE TABLE IF NOT EXISTS
followers_names ( retweets(
user text not null, user_id integer not null,
date_update text not null, tweet_id integer not null,
follower text not null, CONSTRAINT retweets_pk PRIMARY KEY(user_id, tweet_id),
PRIMARY KEY (user, follower) CONSTRAINT user_id_fk FOREIGN KEY(user_id) REFERENCES users(id),
CONSTRAINT tweet_id_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id)
); );
"""
cursor.execute(table_retweets)
table_mentions = """
CREATE TABLE IF NOT EXISTS
mentions(
tweet_id integer not null,
id integer not null,
id_str text not null,
screen_name text not null,
CONSTRAINT mentions_pk PRIMARY KEY(tweet_id,id),
CONSTRAINT tweet_id_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id)
CONSTRAINT user_id_fk FOREIGN KEY(id) REFERENCES users(id)
);
""" """
cursor.execute(table_followers_names) cursor.execute(table_mentions)
table_following_names = """ table_replies = """
CREATE TABLE IF NOT EXISTS CREATE TABLE IF NOT EXISTS
following_names ( replies(
user text not null, tweet_id integer not null,
date_update text not null, id integer not null,
follows text not null, id_str text not null,
PRIMARY KEY (user, follows) screen_name text not null,
CONSTRAINT replies_pk PRIMARY KEY(tweet_id,id),
CONSTRAINT tweet_id_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id)
CONSTRAINT user_id_fk FOREIGN KEY(id) REFERENCES users(id)
); );
""" """
cursor.execute(table_following_names) cursor.execute(table_replies)
table_tags = """
CREATE TABLE IF NOT EXISTS
tags(
tweet_id integer not null,
id integer not null,
id_str text not null,
screen_name text not null,
CONSTRAINT tags_pk PRIMARY KEY(tweet_id, id),
CONSTRAINT tweet_id_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id),
CONSTRAINT user_id_fk FOREIGN KEY(id) REFERENCES users(id)
);
"""
cursor.execute(table_tags)
table_hashtags = """
CREATE TABLE IF NOT EXISTS
hashtags(
tweet_id integer not null,
tag_name text not null,
CONSTRAINT tweet_id_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id)
);
"""
cursor.execute(table_hashtags)
table_urls = """
CREATE TABLE IF NOT EXISTS
urls(
tweet_id integer not null,
url text not null,
CONSTRAINT urls_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id)
);
"""
cursor.execute(table_urls)
table_photos = """
CREATE TABLE IF NOT EXISTS
photos(
tweet_id integer not null,
url text not null,
CONSTRAINT photos_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id)
);
"""
cursor.execute(table_photos)
table_favorites = """
CREATE TABLE IF NOT EXISTS
favorites(
user_id integer not null,
tweet_id integer not null,
CONSTRAINT favorites_pk PRIMARY KEY (user_id, tweet_id),
CONSTRAINT user_id_fk FOREIGN KEY (user_id) REFERENCES users(id),
CONSTRAINT tweet_id_fk FOREIGN KEY (tweet_id) REFERENCES tweets(id)
);
"""
cursor.execute(table_favorites)
table_followers = """ table_followers = """
CREATE TABLE IF NOT EXISTS CREATE TABLE IF NOT EXISTS
followers ( followers (
id integer not null, id integer not null,
name text, follower_id integer not null,
username text not null, CONSTRAINT followers_pk PRIMARY KEY (id, follower_id),
bio text, CONSTRAINT id_fk FOREIGN KEY(id) REFERENCES users(id),
location, CONSTRAINT follower_id_fk FOREIGN KEY(follower_id) REFERENCES users(id)
url text,
join_date text not null,
join_time text not null,
tweets integer,
following integer,
followers integer,
likes integer,
media integer,
private text not null,
verified text not null,
avatar text not null,
date_update text not null,
follower text not null,
PRIMARY KEY (id, username, follower)
); );
""" """
cursor.execute(table_followers) cursor.execute(table_followers)
...@@ -96,27 +201,35 @@ def init(db): ...@@ -96,27 +201,35 @@ def init(db):
CREATE TABLE IF NOT EXISTS CREATE TABLE IF NOT EXISTS
following ( following (
id integer not null, id integer not null,
name text, following_id integer not null,
username text not null, CONSTRAINT following_pk PRIMARY KEY (id, following_id),
bio text, CONSTRAINT id_fk FOREIGN KEY(id) REFERENCES users(id),
location text, CONSTRAINT following_id_fk FOREIGN KEY(following_id) REFERENCES users(id)
url text, );
join_date text not null, """
join_time text not null, cursor.execute(table_following)
tweets integer,
following integer, table_followers_names = """
followers integer, CREATE TABLE IF NOT EXISTS
likes integer, followers_names (
media integer, user text not null,
private text not null, date_update text not null,
verified text not null, follower text not null,
avatar text not null, PRIMARY KEY (user, follower)
);
"""
cursor.execute(table_followers_names)
table_following_names = """
CREATE TABLE IF NOT EXISTS
following_names (
user text not null,
date_update text not null, date_update text not null,
follows text not null, follows text not null,
PRIMARY KEY (id, username, follows) PRIMARY KEY (user, follows)
); );
""" """
cursor.execute(table_following) cursor.execute(table_following_names)
return conn return conn
except Exception as e: except Exception as e:
...@@ -150,11 +263,12 @@ def follow(conn, Username, Followers, User): ...@@ -150,11 +263,12 @@ def follow(conn, Username, Followers, User):
except sqlite3.IntegrityError: except sqlite3.IntegrityError:
pass pass
def user(conn, Username, Followers, User): def user(conn, config, User):
try: try:
date_time = str(datetime.now()) date_time = str(datetime.now())
cursor = conn.cursor() cursor = conn.cursor()
entry = (User.id, entry = (int(User.id),
User.id,
User.name, User.name,
User.username, User.username,
User.bio, User.bio,
...@@ -170,37 +284,105 @@ def user(conn, Username, Followers, User): ...@@ -170,37 +284,105 @@ def user(conn, Username, Followers, User):
User.is_private, User.is_private,
User.is_verified, User.is_verified,
User.avatar, User.avatar,
date_time, User.background_image,
Username,) date_time)
table = uTable(Followers) query = f"INSERT INTO users VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"
query = f"INSERT INTO {table} VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"
cursor.execute(query, entry) cursor.execute(query, entry)
if config.Followers or config.Following:
table = uTable(config.Followers)
query = f"INSERT INTO {table} VALUES(?,?)"
cursor.execute(query, (config.User_id, int(User.id)))
conn.commit() conn.commit()
except sqlite3.IntegrityError: except sqlite3.IntegrityError:
pass pass
def get_user_id(conn, id):
cursor = conn.cursor()
cursor.execute('SELECT id FROM users WHERE id = ? LIMIT 1', (id,))
resultset = cursor.fetchall()
return resultset[0][0] if resultset else -1
def tweets(conn, Tweet, config): def tweets(conn, Tweet, config):
try: try:
date_time = str(datetime.now()) date_time = str(datetime.now())
cursor = conn.cursor() cursor = conn.cursor()
entry = (Tweet.id, entry = (Tweet.id,
Tweet.user_id, Tweet.id_str,
Tweet.tweet,
Tweet.conversation_id,
Tweet.datetime,
Tweet.datestamp, Tweet.datestamp,
Tweet.timestamp, Tweet.timestamp,
Tweet.timezone, Tweet.timezone,
Tweet.place,
Tweet.location, Tweet.location,
Tweet.replies_count,
Tweet.likes_count,
Tweet.retweets_count,
Tweet.user_id,
Tweet.user_id_str,
Tweet.username, Tweet.username,
Tweet.tweet, Tweet.name,
Tweet.replies, Tweet.profile_image_url,
Tweet.likes,
Tweet.retweets,
",".join(Tweet.hashtags),
Tweet.link, Tweet.link,
Tweet.retweet, Tweet.gif_url,
Tweet.user_rt, Tweet.gif_thumb,
",".join(Tweet.mentions), Tweet.video_url,
Tweet.video_thumb,
Tweet.is_reply_to,
Tweet.has_parent_tweet,
Tweet.in_reply_to_screen_name,
Tweet.in_reply_to_status_id,
Tweet.in_reply_to_status_id_str,
Tweet.in_reply_to_user_id,
Tweet.in_reply_to_user_id_str,
Tweet.is_quote_status,
Tweet.quote_id,
Tweet.quote_id_str,
Tweet.quote_url,
date_time) date_time)
cursor.execute('INSERT INTO tweets VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)', entry) cursor.execute('INSERT INTO tweets VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)', entry)
if len(Tweet.mentions) > 0:
query = 'INSERT INTO mentions VALUES(?, ?, ?, ?)'
for mention in Tweet.mentions:
cursor.execute(query, (Tweet.id, mention["id"], mention["id_str"], mention["screen_name"]))
if len(Tweet.replies) > 0:
query = 'INSERT INTO replies VALUES(?, ?, ?, ?)'
for reply in Tweet.replies:
cursor.execute(query, (Tweet.id, reply["id"], reply["id_str"], reply["screen_name"]))
if len(Tweet.tags) > 0:
query = 'INSERT INTO tags VALUES(?, ?, ?, ?)'
for tag in Tweet.tags:
cursor.execute(query, (Tweet.id, tag["id"], tag["id_str"], tag["screen_name"]))
if len(Tweet.hashtags) > 0:
query = 'INSERT OR IGNORE INTO hashtags (tweet_id, tag_name) VALUES(?,?)'
for tag in Tweet.hashtags:
cursor.execute(query, (Tweet.id, tag))
if len(Tweet.urls) > 0:
query = 'INSERT INTO urls VALUES(?, ?)'
for url in Tweet.urls:
cursor.execute(query, (Tweet.id, url))
if len(Tweet.photos) > 0:
query = 'INSERT INTO photos VALUES(?, ?)'
for photo in Tweet.photos:
cursor.execute(query, (Tweet.id, photo))
if config.Favorites:
query = 'INSERT INTO favorites VALUES(?,?)'
cursor.execute(query, (config.User_id, Tweet.id))
if Tweet.retweet == 1:
query = 'INSERT INTO retweets VALUES(?,?)'
cursor.execute(query, (config.User_id, Tweet.id))
conn.commit() conn.commit()
except sqlite3.IntegrityError: except sqlite3.IntegrityError:
pass pass
\ No newline at end of file
...@@ -4,10 +4,6 @@ from time import strftime, localtime ...@@ -4,10 +4,6 @@ from time import strftime, localtime
import contextlib import contextlib
import sys import sys
_index_tweet_status = False
_index_follow_status = False
_index_user_status = False
class RecycleObject(object): class RecycleObject(object):
def write(self, junk): pass def write(self, junk): pass
def flush(self): pass def flush(self): pass
...@@ -19,116 +15,6 @@ def nostdout(): ...@@ -19,116 +15,6 @@ def nostdout():
yield yield
sys.stdout = savestdout sys.stdout = savestdout
def handleIndexResponse(response):
try:
if response["status"] == 400:
return True
except KeyError:
pass
if response["acknowledged"]:
print("[+] Index \"" + response["index"] + "\" created!")
else:
print("[x] error index creation :: storage.elasticsearch.handleIndexCreation")
if response["shards_acknowledged"]:
print("[+] Shards acknowledged, everything is ready to be used!")
return True
else:
print("[x] error with shards :: storage.elasticsearch.HandleIndexCreation")
return False
def createIndex(config, instance, **scope):
if scope.get("scope") == "tweet":
tweets_body = {
"mappings": {
"items": {
"properties": {
"id": {"type": "long"},
"date": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss"},
"timezone": {"type": "text"},
"location": {"type": "text"},
"hashtags": {"type": "text"},
"tweet": {"type": "text"},
"replies": {"type": "boolean"},
"retweets": {"type": "boolean"},
"likes": {"type": "boolean"},
"user_id": {"type": "keyword"},
"username": {"type": "keyword"},
"day": {"type": "integer"},
"hour": {"type": "integer"},
"link": {"type": "text"},
"retweet": {"type": "text"},
"user_rt": {"type": "text"},
"essid": {"type": "keyword"},
"nlikes": {"type": "integer"},
"nreplies": {"type": "integer"},
"nretweets": {"type": "integer"},
"search": {"type": "text"}
}
}
},
"settings": {
"number_of_shards": 1
}
}
with nostdout():
resp = instance.indices.create(index=config.Index_tweets, body=tweets_body, ignore=400)
return handleIndexResponse(resp)
elif scope.get("scope") == "follow":
follow_body = {
"mappings": {
"items": {
"properties": {
"user": {"type": "keyword"},
"follow": {"type": "keyword"},
"essid": {"type": "keyword"}
}
}
},
"settings": {
"number_of_shards": 1
}
}
with nostdout():
resp = instance.indices.create(index=config.Index_follow, body=follow_body, ignore=400)
return handleIndexResponse(resp)
elif scope.get("scope") == "user":
user_body = {
"mappings": {
"items": {
"properties": {
"id": {"type": "keyword"},
"name": {"type": "keyword"},
"username": {"type": "keyword"},
"bio": {"type": "text"},
"location": {"type": "keyword"},
"url": {"type": "text"},
"join_datetime": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss"},
"join_date": {"type": "date", "format": "yyyy-MM-dd"},
"join_time": {"type": "date", "format": "HH:mm:ss"},
"tweets": {"type": "integer"},
"following": {"type": "integer"},
"followers": {"type": "integer"},
"likes": {"type": "integer"},
"media": {"type": "integer"},
"private": {"type": "boolean"},
"verified": {"type": "boolean"},
"avatar": {"type": "text"},
"essid": {"type": "keyword"}
}
}
},
"settings": {
"number_of_shards": 1
}
}
with nostdout():
resp = instance.indices.create(index=config.Index_users, body=user_body, ignore=400)
return handleIndexResponse(resp)
else:
print("[x] error index pre-creation :: storage.elasticsearch.createIndex")
return False
def weekday(day): def weekday(day):
weekdays = { weekdays = {
"Monday": 1, "Monday": 1,
...@@ -146,7 +32,6 @@ def hour(datetime): ...@@ -146,7 +32,6 @@ def hour(datetime):
return strftime("%H", localtime(datetime)) return strftime("%H", localtime(datetime))
def Tweet(Tweet, config): def Tweet(Tweet, config):
global _index_tweet_status
weekdays = { weekdays = {
"Monday": 1, "Monday": 1,
"Tuesday": 2, "Tuesday": 2,
...@@ -159,6 +44,9 @@ def Tweet(Tweet, config): ...@@ -159,6 +44,9 @@ def Tweet(Tweet, config):
day = weekdays[strftime("%A", localtime(Tweet.datetime))] day = weekdays[strftime("%A", localtime(Tweet.datetime))]
actions = [] actions = []
nLikes = 1
nReplies = 1
nRetweets = 1
dt = f"{Tweet.datestamp} {Tweet.timestamp}" dt = f"{Tweet.datestamp} {Tweet.timestamp}"
...@@ -172,35 +60,182 @@ def Tweet(Tweet, config): ...@@ -172,35 +60,182 @@ def Tweet(Tweet, config):
"created_at": Tweet.datetime, "created_at": Tweet.datetime,
"date": dt, "date": dt,
"timezone": Tweet.timezone, "timezone": Tweet.timezone,
"place": Tweet.place,
"location": Tweet.location, "location": Tweet.location,
"tweet": Tweet.tweet, "tweet": Tweet.tweet,
"hashtags": Tweet.hashtags, "hashtags": Tweet.hashtags,
"user_id": Tweet.user_id, "user_id": Tweet.user_id,
"user_id_str": Tweet.user_id_str,
"username": Tweet.username, "username": Tweet.username,
"name": Tweet.name,
"profile_image_url": Tweet.profile_image_url,
"day": day, "day": day,
"hour": hour(Tweet.datetime), "hour": hour(Tweet.datetime),
"link": Tweet.link, "link": Tweet.link,
"gif_url": Tweet.gif_url,
"gif_thumb": Tweet.gif_thumb,
"video_url": Tweet.video_url,
"video_thumb": Tweet.video_thumb,
"is_reply_to": Tweet.is_reply_to,
"has_parent_tweet": Tweet.has_parent_tweet,
"retweet": Tweet.retweet, "retweet": Tweet.retweet,
"user_rt": Tweet.user_rt,
"essid": config.Essid, "essid": config.Essid,
"nlikes": int(Tweet.likes), "nlikes": int(Tweet.likes_count),
"nreplies": int(Tweet.replies), "nreplies": int(Tweet.replies_count),
"nretweets": int(Tweet.retweets), "nretweets": int(Tweet.retweets_count),
"is_quote_status": Tweet.is_quote_status,
"quote_id": Tweet.quote_id,
"quote_id_str": Tweet.quote_id_str,
"quote_url": Tweet.quote_url,
"search": str(config.Search) "search": str(config.Search)
} }
} }
actions.append(j_data) actions.append(j_data)
if config.ES_count["likes"]:
for l in range(int(Tweet.likes)):
j_data = {
"_index": config.Index_tweets,
"_type": config.Index_type,
"_id": str(Tweet.id) + "_like_" + str(nLikes) + config.Essid,
"_source": {
"id": str(Tweet.id),
"conversation_id": Tweet.conversation_id,
"created_at": Tweet.datetime,
"date": dt,
"timezone": Tweet.timezone,
"place": Tweet.place,
"location": Tweet.location,
"tweet": Tweet.tweet,
"hashtags": Tweet.hashtags,
"user_id": Tweet.user_id,
"user_id_str": Tweet.user_id_str,
"username": Tweet.username,
"name": Tweet.name,
"profile_image_url": Tweet.profile_image_url,
"day": day,
"hour": hour(Tweet.datetime),
"link": Tweet.link,
"gif_url": Tweet.gif_url,
"gif_thumb": Tweet.gif_thumb,
"video_url": Tweet.video_url,
"video_thumb": Tweet.video_thumb,
"is_reply_to": Tweet.is_reply_to,
"has_parent_tweet": Tweet.has_parent_tweet,
"retweet": Tweet.retweet,
"essid": config.Essid,
"nlikes": int(Tweet.likes_count),
"nreplies": int(Tweet.replies_count),
"nretweets": int(Tweet.retweets_count),
"is_quote_status": Tweet.is_quote_status,
"quote_id": Tweet.quote_id,
"quote_id_str": Tweet.quote_id_str,
"quote_url": Tweet.quote_url,
"search": str(config.Search),
"likes": True
}
}
actions.append(j_data)
nLikes += 1
if config.ES_count["replies"]:
for rep in range(int(Tweet.replies)):
j_data = {
"_index": config.Index_tweets,
"_type": config.Index_type,
"_id": str(Tweet.id) + "_reply_" + str(nReplies) + config.Essid,
"_source": {
"id": str(Tweet.id),
"conversation_id": Tweet.conversation_id,
"created_at": Tweet.datetime,
"date": dt,
"timezone": Tweet.timezone,
"place": Tweet.place,
"location": Tweet.location,
"tweet": Tweet.tweet,
"hashtags": Tweet.hashtags,
"user_id": Tweet.user_id,
"user_id_str": Tweet.user_id_str,
"username": Tweet.username,
"name": Tweet.name,
"profile_image_url": Tweet.profile_image_url,
"day": day,
"hour": hour(Tweet.datetime),
"link": Tweet.link,
"gif_url": Tweet.gif_url,
"gif_thumb": Tweet.gif_thumb,
"video_url": Tweet.video_url,
"video_thumb": Tweet.video_thumb,
"is_reply_to": Tweet.is_reply_to,
"has_parent_tweet": Tweet.has_parent_tweet,
"retweet": Tweet.retweet,
"essid": config.Essid,
"nlikes": int(Tweet.likes_count),
"nreplies": int(Tweet.replies_count),
"nretweets": int(Tweet.retweets_count),
"is_quote_status": Tweet.is_quote_status,
"quote_id": Tweet.quote_id,
"quote_id_str": Tweet.quote_id_str,
"quote_url": Tweet.quote_url,
"search": str(config.Search),
"replies": True
}
}
actions.append(j_data)
nReplies += 1
if config.ES_count["retweets"]:
for ret in range(int(Tweet.retweets)):
j_data = {
"_index": config.Index_tweets,
"_type": config.Index_type,
"_id": str(Tweet.id) + "_retweet_" + str(nRetweets) + config.Essid,
"_source": {
"id": str(Tweet.id),
"conversation_id": Tweet.conversation_id,
"created_at": Tweet.datetime,
"date": dt,
"timezone": Tweet.timezone,
"place": Tweet.place,
"location": Tweet.location,
"tweet": Tweet.tweet,
"hashtags": Tweet.hashtags,
"user_id": Tweet.user_id,
"user_id_str": Tweet.user_id_str,
"username": Tweet.username,
"name": Tweet.name,
"profile_image_url": Tweet.profile_image_url,
"day": day,
"hour": hour(Tweet.datetime),
"link": Tweet.link,
"gif_url": Tweet.gif_url,
"gif_thumb": Tweet.gif_thumb,
"video_url": Tweet.video_url,
"video_thumb": Tweet.video_thumb,
"is_reply_to": Tweet.is_reply_to,
"has_parent_tweet": Tweet.has_parent_tweet,
"retweet": Tweet.retweet,
"essid": config.Essid,
"nlikes": int(Tweet.likes_count),
"nreplies": int(Tweet.replies_count),
"nretweets": int(Tweet.retweets_count),
"is_quote_status": Tweet.is_quote_status,
"quote_id": Tweet.quote_id,
"quote_id_str": Tweet.quote_id_str,
"quote_url": Tweet.quote_url,
"search": str(config.Search),
"retweets": True
}
}
actions.append(j_data)
nRetweets += 1
es = Elasticsearch(config.Elasticsearch) es = Elasticsearch(config.Elasticsearch)
if not _index_tweet_status:
_index_tweet_status = createIndex(config, es, scope="tweet")
with nostdout(): with nostdout():
helpers.bulk(es, actions, chunk_size=2000, request_timeout=200) helpers.bulk(es, actions, chunk_size=2000, request_timeout=200)
actions = [] actions = []
def Follow(user, config): def Follow(user, config):
global _index_follow_status
actions = [] actions = []
j_data = { j_data = {
...@@ -216,14 +251,11 @@ def Follow(user, config): ...@@ -216,14 +251,11 @@ def Follow(user, config):
actions.append(j_data) actions.append(j_data)
es = Elasticsearch(config.Elasticsearch) es = Elasticsearch(config.Elasticsearch)
if not _index_follow_status:
_index_follow_status = createIndex(config, es, scope="follow")
with nostdout(): with nostdout():
helpers.bulk(es, actions, chunk_size=2000, request_timeout=200) helpers.bulk(es, actions, chunk_size=2000, request_timeout=200)
actions = [] actions = []
def UserProfile(user, config): def UserProfile(user, config):
global _index_user_status
actions = [] actions = []
j_data = { j_data = {
...@@ -248,14 +280,13 @@ def UserProfile(user, config): ...@@ -248,14 +280,13 @@ def UserProfile(user, config):
"private": user.is_private, "private": user.is_private,
"verified": user.is_verified, "verified": user.is_verified,
"avatar": user.avatar, "avatar": user.avatar,
"background_image": user.background_image,
"session": config.Essid "session": config.Essid
} }
} }
actions.append(j_data) actions.append(j_data)
es = Elasticsearch(config.Elasticsearch) es = Elasticsearch(config.Elasticsearch)
if not _index_user_status:
_index_user_status = createIndex(config, es, scope="user")
with nostdout(): with nostdout():
helpers.bulk(es, actions, chunk_size=2000, request_timeout=200) helpers.bulk(es, actions, chunk_size=2000, request_timeout=200)
actions = [] actions = []
\ No newline at end of file
from time import strftime, localtime from time import strftime, localtime
import re import json
#from datetime import datetime #from datetime import datetime
#import logging #import logging
...@@ -11,16 +11,84 @@ class tweet: ...@@ -11,16 +11,84 @@ class tweet:
def __init__(self): def __init__(self):
pass pass
def getRawURLS(tw, link, config):
player = tw.find_all("div","PlayableMedia-player")
gif_url, gif_thumb, video_url, video_thumb = "", "", "", ""
for node in player:
styles = node.attrs['style'].split()
for style in styles:
if style.startswith('background'):
tmp = "background-image:url('"
style = style.replace(tmp, "")
if "tweet_video_thumb" in style:
gif_url = style.replace("')",'')
gif_url = gif_url.replace('.jpg','.mp4')
gif_url = gif_url.replace('https://pbs','https://video')
gif_url = gif_url.replace("_thumb", "")
gif_thumb = style.replace("')", "")
else:
video_url, video_thumb = "video","video_thumb"
return gif_url, gif_thumb, video_url, video_thumb
def getMentions(tw): def getMentions(tw):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getMentions') #logging.info("[<] " + str(datetime.now()) + ':: tweet+getMentions')
"""Extract ment from tweet """Extract ment from tweet
""" """
mentions = [{"id":int(mention["data-mentioned-user-id"]),"id_str": mention["data-mentioned-user-id"],"screen_name":mention.get('href').split("/")[-1]} for mention in tw.find_all('a',{'class':'twitter-atreply'})]
return mentions
def getReplies(tw):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getReplies')
"""Extract replies from tweet
"""
replyToUsersJSON = json.loads(tw.find("div")["data-reply-to-users-json"])
replies = [{"id":int(reply["id_str"]),"id_str": reply["id_str"],"screen_name":reply["screen_name"]} for reply in replyToUsersJSON]
return replies
def getTags(tw):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getTags')
"""Extract tags from tweet
"""
tags = []
try: try:
mentions = tw.find("div", "js-original-tweet")["data-mentions"].split(" ") tag_links = tw.find("div","media-tagging-block").find_all("a","js-user-profile-link")
for tag in tag_links:
if tag.has_attr("data-user-id"):
tmpData = {
"id":int(tag["data-user-id"]),
"id_str": tag["data-user-id"],
"screen_name":tag.get('href').split("/")[-1]
}
tags.append(tmpData)
except: except:
mentions = "" tags = []
return mentions return tags
def getQuoteInfo(tw):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getQuoteInfo')
"""Extract quote from tweet
"""
base_twitter = "https://twitter.com"
quote_status = 0
quote_id = 0
quote_id_str = ""
quote_url = ""
try:
quote = tw.find("div","QuoteTweet-innerContainer")
quote_status = 1
quote_id = int(quote["data-item-id"])
quote_id_str = quote["data-item-id"]
quote_url = base_twitter + quote.get("href")
except:
quote_status = 0
return quote_status, quote_id, quote_id_str, quote_url
def getText(tw): def getText(tw):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getText') #logging.info("[<] " + str(datetime.now()) + ':: tweet+getText')
...@@ -33,25 +101,6 @@ def getText(tw): ...@@ -33,25 +101,6 @@ def getText(tw):
return text return text
def getTweet(tw, mentions):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getTweet')
try:
text = getText(tw)
for i in range(len(mentions)):
mention = f"@{mentions[i]}"
if mention not in text:
text = f"{mention} {text}"
except:
text = getText(tw)
return text
def getHashtags(text):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getHashtags')
"""Get hashtags of tweet
"""
return re.findall(r'(?i)\#\w+', text, flags=re.UNICODE)
def getStat(tw, _type): def getStat(tw, _type):
"""Get stats about Tweet """Get stats about Tweet
""" """
...@@ -61,42 +110,49 @@ def getStat(tw, _type): ...@@ -61,42 +110,49 @@ def getStat(tw, _type):
def getRetweet(profile, username, user): def getRetweet(profile, username, user):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getRetweet') #logging.info("[<] " + str(datetime.now()) + ':: tweet+getRetweet')
if profile and username.lower() != user: if profile and username.lower() != user.lower():
return True return 1
def getUser_rt(profile, username, user):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getUser_rt')
"""Get username that retweeted
"""
if getRetweet(profile, username, user):
user_rt = user
else:
user_rt = "None"
return user_rt
def Tweet(tw, location, config): def Tweet(tw, location, config):
"""Create Tweet object """Create Tweet object
""" """
##logging.info("[<] " + str(datetime.now()) + ':: tweet+Tweet') ##logging.info("[<] " + str(datetime.now()) + ':: tweet+Tweet')
t = tweet() t = tweet()
t.id = tw.find("div")["data-item-id"] t.id = int(tw.find("div")["data-item-id"])
t.id_str = tw.find("div")["data-item-id"]
t.conversation_id = tw.find("div")["data-conversation-id"]
t.datetime = int(tw.find("span", "_timestamp")["data-time"]) t.datetime = int(tw.find("span", "_timestamp")["data-time"])
t.datestamp = strftime("%Y-%m-%d", localtime(t.datetime)) t.datestamp = strftime("%Y-%m-%d", localtime(t.datetime))
t.timestamp = strftime("%H:%M:%S", localtime(t.datetime)) t.timestamp = strftime("%H:%M:%S", localtime(t.datetime))
t.user_id = tw.find("a", "account-group js-account-group js-action-profile js-user-profile-link js-nav")["data-user-id"] t.user_id = int(tw.find("div")["data-user-id"])
t.username = tw.find("span", "username").text.replace("@", "") t.user_id_str = tw.find("div")["data-user-id"]
t.username = tw.find("div")["data-screen-name"]
t.name = tw.find("div")["data-name"]
t.profile_image_url = tw.find("img", "js-action-profile-avatar").get('src').replace("_bigger","")
t.place = tw.find("a","js-geo-pivot-link").text.strip() if tw.find("a","js-geo-pivot-link") else None
t.timezone = strftime("%Z", localtime()) t.timezone = strftime("%Z", localtime())
for img in tw.findAll("img", "Emoji Emoji--forText"): for img in tw.findAll("img", "Emoji Emoji--forText"):
img.replaceWith(img["alt"]) img.replaceWith(img["alt"])
t.mentions = getMentions(tw) t.mentions = getMentions(tw)
t.tweet = getTweet(tw, t.mentions) t.tags = getTags(tw)
t.replies = getReplies(tw)
t.urls = [link.attrs["data-expanded-url"] for link in tw.find_all('a',{'class':'twitter-timeline-link'}) if link.has_attr("data-expanded-url")]
t.photos = [photo_node.attrs['data-image-url'] for photo_node in tw.find_all("div", "AdaptiveMedia-photoContainer")]
t.tweet = getText(tw)
t.location = location t.location = location
t.hashtags = getHashtags(t.tweet) t.hashtags = [hashtag.text for hashtag in tw.find_all("a","twitter-hashtag")]
t.replies = getStat(tw, "reply") t.replies_count = getStat(tw, "reply")
t.retweets = getStat(tw, "retweet") t.retweets_count = getStat(tw, "retweet")
t.likes = getStat(tw, "favorite") t.likes_count = getStat(tw, "favorite")
t.link = f"https://twitter.com/{t.username}/status/{t.id}" t.link = f"https://twitter.com/{t.username}/status/{t.id}"
t.retweet = getRetweet(config.Profile, t.username, config.Username) t.retweet = getRetweet(config.Profile, t.username, config.Username)
t.user_rt = getUser_rt(config.Profile, t.username, config.Username) t.gif_url, t.gif_thumb, t.video_url, t.video_thumb = getRawURLS(tw, t.link, config)
t.is_quote_status, t.quote_id, t.quote_id_str, t.quote_url = getQuoteInfo(tw)
t.is_reply_to = int(bool(tw.find("div")["data-is-reply-to"])) if tw.find("div").has_attr("data-is-reply-to") else 0
t.has_parent_tweet = int(bool(tw.find("div")["data-has-parent-tweet"])) if tw.find("div").has_attr("data-has-parent-tweet") else 0
t.in_reply_to_screen_name = ""
t.in_reply_to_status_id = 0
t.in_reply_to_status_id_str = ""
t.in_reply_to_user_id = 0
t.in_reply_to_user_id_str = ""
return t return t
\ No newline at end of file
...@@ -20,6 +20,10 @@ def inf(ur, _type): ...@@ -20,6 +20,10 @@ def inf(ur, _type):
ret = group["data-screen-name"] ret = group["data-screen-name"]
elif _type == "private": elif _type == "private":
ret = group["data-protected"] ret = group["data-protected"]
if ret == 'true':
ret = 1
else:
ret = 0
return ret return ret
...@@ -28,18 +32,18 @@ def card(ur, _type): ...@@ -28,18 +32,18 @@ def card(ur, _type):
try: try:
ret = ur.find("p", "ProfileHeaderCard-bio u-dir").text.replace("\n", " ") ret = ur.find("p", "ProfileHeaderCard-bio u-dir").text.replace("\n", " ")
except: except:
ret = "None" ret = None
elif _type == "location": elif _type == "location":
try: try:
ret = ur.find("span", "ProfileHeaderCard-locationText u-dir").text ret = ur.find("span", "ProfileHeaderCard-locationText u-dir").text
ret = ret[15:].replace("\n", " ")[:-10] ret = ret[15:].replace("\n", " ")[:-10]
except: except:
ret = "None" ret = None
elif _type == "url": elif _type == "url":
try: try:
ret = ur.find("span", "ProfileHeaderCard-urlText u-dir").find("a")["title"] ret = ur.find("span", "ProfileHeaderCard-urlText u-dir").find("a")["title"]
except: except:
ret = "None" ret = None
return ret return ret
...@@ -54,6 +58,8 @@ def convertToInt(x): ...@@ -54,6 +58,8 @@ def convertToInt(x):
"b" : 1000000000, "b" : 1000000000,
} }
try : try :
if ',' in x:
x = x.replace(',', '')
y = int(x) y = int(x)
return y return y
except : except :
...@@ -79,11 +85,10 @@ def stat(ur, _type): ...@@ -79,11 +85,10 @@ def stat(ur, _type):
def media(ur): def media(ur):
try: try:
media_count = ur.find("a", "PhotoRail-headingWithCount js-nav").text media_count = ur.find("a", "PhotoRail-headingWithCount js-nav").text.strip().split(" ")[0]
media_count = media_count.replace("\n", "")[32:].split(" ")[0]
media_count = convertToInt(media_count) media_count = convertToInt(media_count)
except: except:
media_count = "0" media_count = 0
return media_count return media_count
...@@ -91,11 +96,11 @@ def verified(ur): ...@@ -91,11 +96,11 @@ def verified(ur):
try: try:
is_verified = ur.find("span", "ProfileHeaderCard-badges").text is_verified = ur.find("span", "ProfileHeaderCard-badges").text
if "Verified account" in is_verified: if "Verified account" in is_verified:
is_verified = "true" is_verified = 1
else: else:
is_verified = "false" is_verified = 0
except: except:
is_verified = "false" is_verified = 0
return is_verified return is_verified
...@@ -119,4 +124,5 @@ def User(ur): ...@@ -119,4 +124,5 @@ def User(ur):
u.is_private = inf(ur, "private") u.is_private = inf(ur, "private")
u.is_verified = verified(ur) u.is_verified = verified(ur)
u.avatar = ur.find("img", "ProfileAvatar-image")["src"] u.avatar = ur.find("img", "ProfileAvatar-image")["src"]
u.background_image = ur.find('div',{'class':'ProfileCanopy-headerBg'}).find('img').get('src')
return u return u
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment