Commit d4672ee0 authored by Francesco Poldi's avatar Francesco Poldi

Added loggin options, fixes retweets

parent 700d30d6
......@@ -9,3 +9,12 @@ Copyright (c) 2018 Cody Zacharias
'''
from .config import Config
from . import run
#import logging
#logger = logging.getLogger()
#handler = logging.FileHandler('twint.log')
#formatter = logging.Formatter(
# '%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
#handler.setFormatter(formatter)
#logger.addHandler(handler)
#logger.setLevel(logging.DEBUG)
\ No newline at end of file
import datetime
#import logging
class Datelock:
_until = None
_since = None
_since_def_user = None
def Set(Until, Since):
#logging.info("[<] " + str(datetime.datetime.now()) + ':: datelock+Set')
d = Datelock()
if Until:
......
from bs4 import BeautifulSoup
from re import findall
from json import loads
#import logging
#from datetime import datetime
def Follow(response):
#logging.info("[<] " + str(datetime.now()) + ':: feed+Follow')
soup = BeautifulSoup(response, "html.parser")
follow = soup.find_all("td", "info fifty screenname")
cursor = soup.find_all("div", "w-button-more")
......@@ -14,6 +17,7 @@ def Follow(response):
return follow, cursor
def Mobile(response):
#logging.info("[<] " + str(datetime.now()) + ':: feed+Mobile')
soup = BeautifulSoup(response, "html.parser")
tweets = soup.find_all("span", "metadata")
max_id = soup.find_all("div", "w-button-more")
......@@ -25,6 +29,7 @@ def Mobile(response):
return tweets, max_id
def profile(response):
#logging.info("[<] " + str(datetime.now()) + ':: feed+profile')
json_response = loads(response)
html = json_response["items_html"]
soup = BeautifulSoup(html, "html.parser")
......@@ -33,6 +38,7 @@ def profile(response):
return feed, feed[-1]["data-item-id"]
def Json(response):
#logging.info("[<] " + str(datetime.now()) + ':: feed+Json')
json_response = loads(response)
html = json_response["items_html"]
soup = BeautifulSoup(html, "html.parser")
......
#import logging
#from datetime import datetime
def Tweet(config, t):
#logging.info("[<] " + str(datetime.now()) + ':: format+Tweet')
if config.Format:
output = config.Format.replace("{id}", t.id)
output = output.replace("{date}", t.datestamp)
......@@ -35,6 +39,7 @@ def Tweet(config, t):
return output
def User(_format, u):
#logging.info("[<] " + str(datetime.now()) + ':: format+User')
if _format:
output = _format.replace("{id}", u.id)
output += output.replace("{name}", u.name)
......
from async_timeout import timeout
from datetime import datetime
from bs4 import BeautifulSoup
import sys
import aiohttp
......@@ -10,7 +11,10 @@ from aiohttp_socks import SocksConnector, SocksVer
from . import url
from .output import Tweets, Users
#import logging
async def RequestUrl(config, init):
#loggin.info("[<] " + str(datetime.now()) + ':: get+requestURL')
_connector = None
if config.Proxy_host is not None:
if config.Proxy_host.lower() == "tor":
......@@ -66,6 +70,7 @@ async def RequestUrl(config, init):
return response
async def MobileRequest(url, **options):
#loggin.info("[<] " + str(datetime.now()) + ':: get+MobileRequest')
ua = {'User-Agent': 'Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/0.8.12'}
connector = options.get("_connector")
if connector:
......@@ -75,6 +80,7 @@ async def MobileRequest(url, **options):
return await Response(session, url)
async def Request(url, **options):
#loggin.info("[<] " + str(datetime.now()) + ':: get+Request')
connector = options.get("_connector")
if connector:
async with aiohttp.ClientSession(connector=connector) as session:
......@@ -83,11 +89,13 @@ async def Request(url, **options):
return await Response(session, url)
async def Response(session, url):
#loggin.info("[<] " + str(datetime.now()) + ':: get+Response')
with timeout(30):
async with session.get(url, ssl=False) as response:
return await response.text()
async def Username(_id):
#loggin.info("[<] " + str(datetime.now()) + ':: get+Username')
url = f"https://twitter.com/intent/user?user_id={_id}&lang=en"
r = await Request(url)
soup = BeautifulSoup(r, "html.parser")
......@@ -95,6 +103,7 @@ async def Username(_id):
return soup.find("a", "fn url alternate-context")["href"].replace("/", "")
async def Tweet(url, config, conn):
#loggin.info("[<] " + str(datetime.now()) + ':: Tweet')
try:
response = await Request(url)
soup = BeautifulSoup(response, "html.parser")
......@@ -106,6 +115,7 @@ async def Tweet(url, config, conn):
pass
async def User(url, config, conn):
#loggin.info("[<] " + str(datetime.now()) + ':: get+User')
try:
response = await Request(url)
soup = BeautifulSoup(response, "html.parser")
......@@ -114,10 +124,12 @@ async def User(url, config, conn):
pass
def Limit(Limit, count):
#loggin.info("[<] " + str(datetime.now()) + ':: get+Limit')
if Limit is not None and count >= int(Limit):
return True
async def Multi(feed, config, conn):
#loggin.info("[<] " + str(datetime.now()) + ':: get+Multi')
count = 0
try:
with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
......
......@@ -4,6 +4,8 @@ from .user import User
from datetime import datetime
from .storage import db, elasticsearch, write, panda
#import logging
follow_object = {}
tweets_object = []
......@@ -12,10 +14,12 @@ user_object = []
_follow_list = []
def clean_follow_list():
#logging.info("[<] " + str(datetime.now()) + ':: output+clean_follow_list')
global _follow_list
_follow_list = []
def datecheck(datestamp, config):
#logging.info("[<] " + str(datetime.now()) + ':: output+datecheck')
if config.Since and config.Until:
d = int(datestamp.replace("-", ""))
s = int(config.Since.replace("-", ""))
......@@ -24,6 +28,7 @@ def datecheck(datestamp, config):
return True
def is_tweet(tw):
#logging.info("[<] " + str(datetime.now()) + ':: output+is_tweet')
try:
tw.find("div")["data-item-id"]
return True
......@@ -31,6 +36,7 @@ def is_tweet(tw):
return False
def _output(obj, output, config, **extra):
#logging.info("[<] " + str(datetime.now()) + ':: output+_output')
if config.Lowercase:
obj.username = obj.username.lower()
for i in range(len(obj.mentions)):
......@@ -68,6 +74,7 @@ def _output(obj, output, config, **extra):
print("unicode error")
async def Tweets(tw, location, config, conn):
#logging.info("[<] " + str(datetime.now()) + ':: output+Tweets')
copyright = tw.find("div", "StreamItemContent--withheld")
if copyright is None and is_tweet(tw):
tweet = Tweet(tw, location, config)
......@@ -86,6 +93,7 @@ async def Tweets(tw, location, config, conn):
_output(tweet, output, config)
async def Users(u, config, conn):
#logging.info("[<] " + str(datetime.now()) + ':: output+Users')
global user_object
user = User(u)
......@@ -109,6 +117,7 @@ async def Users(u, config, conn):
_output(user, output, config)
async def Username(username, config, conn):
#logging.info("[<] " + str(datetime.now()) + ':: output+Username')
global follow_object
follow_var = config.Following*"following" + config.Followers*"followers"
......
from . import datelock, feed, get, output, verbose, storage
from asyncio import get_event_loop
from datetime import timedelta
from datetime import timedelta, datetime
from .storage import db
#import logging
class Twint:
def __init__(self, config):
#loggin.info("[<] " + str(datetime.now()) + ':: run+Twint+__init__')
if config.Resume is not None and config.TwitterSearch:
self.init = f"TWEET-{config.Resume}-0"
else:
......@@ -29,6 +32,7 @@ class Twint:
self.config.Timedelta = (self.d._until - self.d._since).days
async def Feed(self):
#loggin.info("[<] " + str(datetime.now()) + ':: run+Twint+Feed')
response = await get.RequestUrl(self.config, self.init)
if self.config.Debug:
print(response, file=open("twint-last-request.log", "w", encoding="utf-8"))
......@@ -50,6 +54,7 @@ class Twint:
pass
async def follow(self):
#loggin.info("[<] " + str(datetime.now()) + ':: run+Twint+follow')
await self.Feed()
if self.config.User_full:
self.count += await get.Multi(self.feed, self.config, self.conn)
......@@ -60,10 +65,12 @@ class Twint:
await output.Username(username, self.config, self.conn)
async def favorite(self):
#loggin.info("[<] " + str(datetime.now()) + ':: run+Twint+favorite')
await self.Feed()
self.count += await get.Multi(self.feed, self.config, self.conn)
async def profile(self):
#loggin.info("[<] " + str(datetime.now()) + ':: run+Twint+profile')
await self.Feed()
if self.config.Profile_full:
self.count += await get.Multi(self.feed, self.config, self.conn)
......@@ -73,6 +80,7 @@ class Twint:
await output.Tweets(tweet, "", self.config, self.conn)
async def tweets(self):
#loggin.info("[<] " + str(datetime.now()) + ':: run+Twint+tweets')
await self.Feed()
if self.config.Location:
self.count += await get.Multi(self.feed, self.config, self.conn)
......@@ -82,6 +90,7 @@ class Twint:
await output.Tweets(tweet, "", self.config, self.conn)
async def main(self):
#loggin.info("[<] " + str(datetime.now()) + ':: run+Twint+main')
if self.config.User_id is not None:
self.config.Username = await get.Username(self.config.User_id)
......@@ -96,6 +105,7 @@ class Twint:
self.d._until = self.d._until - _days
self.feed = [-1]
#loggin.info("[<] " + str(datetime.now()) + ':: run+Twint+main+CallingGetLimit1')
if get.Limit(self.config.Limit, self.count):
self.d._until = self.d._until - _days
self.feed = [-1]
......@@ -113,6 +123,7 @@ class Twint:
else:
break
#loggin.info("[<] " + str(datetime.now()) + ':: run+Twint+main+CallingGetLimit2')
if get.Limit(self.config.Limit, self.count):
break
......@@ -120,13 +131,16 @@ class Twint:
verbose.Count(self.count, self.config)
def run(config):
#loggin.info("[<] " + str(datetime.now()) + ':: run+run')
get_event_loop().run_until_complete(Twint(config).main())
def Favorites(config):
#loggin.info("[<] " + str(datetime.now()) + ':: run+Favorites')
config.Favorites = True
run(config)
def Followers(config):
#loggin.info("[<] " + str(datetime.now()) + ':: run+Followers')
output.clean_follow_list()
config.Followers = True
config.Following = False
......@@ -138,6 +152,7 @@ def Followers(config):
storage.panda.clean()
def Following(config):
#loggin.info("[<] " + str(datetime.now()) + ':: run+Following')
output.clean_follow_list()
config.Following = True
config.Followers = False
......@@ -149,10 +164,12 @@ def Following(config):
storage.panda.clean()
def Profile(config):
config.Profile = True
#loggin.info("[<] " + str(datetime.now()) + ':: run+Profile')
run(config)
def Search(config):
#loggin.info("[<] " + str(datetime.now()) + ':: run+Search')
config.TwitterSearch = True
config.Following = False
config.Followers = False
......
from time import strftime, localtime
import re
#from datetime import datetime
#import logging
class tweet:
"""Define Tweet class
......@@ -10,6 +12,7 @@ class tweet:
pass
def getMentions(tw):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getMentions')
"""Extract ment from tweet
"""
try:
......@@ -20,6 +23,7 @@ def getMentions(tw):
return mentions
def getText(tw):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getText')
"""Replace some text
"""
text = tw.find("p", "tweet-text").text
......@@ -30,6 +34,7 @@ def getText(tw):
return text
def getTweet(tw, mentions):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getTweet')
try:
text = getText(tw)
for i in range(len(mentions)):
......@@ -42,6 +47,7 @@ def getTweet(tw, mentions):
return text
def getHashtags(text):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getHashtags')
"""Get hashtags of tweet
"""
return re.findall(r'(?i)\#\w+', text, flags=re.UNICODE)
......@@ -49,14 +55,17 @@ def getHashtags(text):
def getStat(tw, _type):
"""Get stats about Tweet
"""
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getStat')
st = f"ProfileTweet-action--{_type} u-hiddenVisually"
return tw.find("span", st).find("span")["data-tweet-stat-count"]
def getRetweet(profile, username, user):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getRetweet')
if profile and username.lower() != user:
return True
def getUser_rt(profile, username, user):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getUser_rt')
"""Get username that retweeted
"""
if getRetweet(profile, username, user):
......@@ -69,6 +78,7 @@ def getUser_rt(profile, username, user):
def Tweet(tw, location, config):
"""Create Tweet object
"""
##logging.info("[<] " + str(datetime.now()) + ':: tweet+Tweet')
t = tweet()
t.id = tw.find("div")["data-item-id"]
t.datetime = int(tw.find("span", "_timestamp")["data-time"])
......
#from datetime import datetime
#import logging
mobile = "https://mobile.twitter.com"
base = "https://twitter.com/i"
async def Favorites(username, init):
#logging.info("[<] " + str(datetime.now()) + ':: url+Favorites')
url = f"{mobile}/{username}/favorites?lang=en"
if init != -1:
......@@ -10,6 +14,7 @@ async def Favorites(username, init):
return url
async def Followers(username, init):
#logging.info("[<] " + str(datetime.now()) + ':: url+Followers')
url = f"{mobile}/{username}/followers?lang=en"
if init != -1:
......@@ -18,6 +23,7 @@ async def Followers(username, init):
return url
async def Following(username, init):
#logging.info("[<] " + str(datetime.now()) + ':: url+Following')
url = f"{mobile}/{username}/following?lang=en"
if init != -1:
......@@ -26,6 +32,7 @@ async def Following(username, init):
return url
async def MobileProfile(username, init):
#logging.info("[<] " + str(datetime.now()) + ':: url+MobileProfile')
url = f"{mobile}/{username}?lang=en"
if init != -1:
......@@ -34,6 +41,7 @@ async def MobileProfile(username, init):
return url
async def Profile(username, init):
#logging.info("[<] " + str(datetime.now()) + ':: url+Profile')
url = f"{base}/profiles/show/{username}/timeline/tweets?include_"
url += "available_features=1&lang=en&include_entities=1"
url += "&include_new_items_bar=true"
......@@ -44,6 +52,7 @@ async def Profile(username, init):
return url
async def Search(config, init):
#logging.info("[<] " + str(datetime.now()) + ':: url+Search')
url = f"{base}/search/timeline?f=tweets&vertical=default&lang=en"
url += "&include_available_features=1&include_entities=1&"
url += f"reset_error_state=false&src=typd&qf=off&max_position={init}&q="
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment