Commit 333704f0 authored by Maxim Gubin's avatar Maxim Gubin Committed by Francesco Poldi

Dynamic user agent + fixing retry bug (#271)

* Add dynamic user agent and bug fix

* Changed error message

Changed error message to clarify that this kind of errors is not expected and we are trying to solve them, it seems that Twitter tries to block our requests and lies about tweets existence
parent b3dac333
......@@ -44,6 +44,7 @@ class Config:
Index_users = "twintuser"
Index_type = "items"
Debug = False
Retries_count = 10
Resume = None
Images = False
Videos = False
......
......@@ -5,7 +5,8 @@ import sys
import aiohttp
import asyncio
import concurrent.futures
import random
from json import loads
from aiohttp_socks import SocksConnector, SocksVer
from . import url
......@@ -14,7 +15,7 @@ from .user import inf
#import logging
async def RequestUrl(config, init):
async def RequestUrl(config, init, headers = []):
#loggin.info("[<] " + str(datetime.now()) + ':: get+requestURL')
_connector = None
if config.Proxy_host is not None:
......@@ -45,17 +46,16 @@ async def RequestUrl(config, init):
print("Error: Please specify --proxy-host, --proxy-port, and --proxy-type")
sys.exit(1)
if config.Profile:
if config.Profile_full:
_url = await url.MobileProfile(config.Username, init)
response = await MobileRequest(_url, connector=_connector)
else:
_url = await url.Profile(config.Username, init)
response = await Request(_url, connector=_connector)
response = await Request(_url, connector=_connector, headers=headers)
elif config.TwitterSearch:
_url, params = await url.Search(config, init)
response = await Request(_url, params=params, connector=_connector)
response = await Request(_url, params=params, connector=_connector, headers=headers)
else:
if config.Following:
_url = await url.Following(config.Username, init)
......@@ -80,21 +80,26 @@ async def MobileRequest(url, **options):
return await Response(session, url)
async def Request(url, connector=None, params=[]):
async def Request(url, connector=None, params=[], headers=[]):
#loggin.info("[<] " + str(datetime.now()) + ':: get+Request')
if connector:
async with aiohttp.ClientSession(connector=connector) as session:
async with aiohttp.ClientSession(connector=connector, headers=headers) as session:
return await Response(session, url, params)
async with aiohttp.ClientSession() as session:
return await Response(session, url, params)
async def Response(session, url, params=[]):
#loggin.info("[<] " + str(datetime.now()) + ':: get+Response')
headers = {'User-Agent': 'Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/0.8.12' }
with timeout(30):
async with session.get(url, ssl=False, headers=headers, params=params) as response:
async with session.get(url, ssl=False, params=params) as response:
return await response.text()
async def RandomUserAgent():
url = "https://fake-useragent.herokuapp.com/browsers/0.1.8"
r = await Request(url)
browsers = loads(r)['browsers']
return random.choice(browsers[random.choice(list(browsers))])
async def Username(_id):
#loggin.info("[<] " + str(datetime.now()) + ':: get+Username')
url = f"https://twitter.com/intent/user?user_id={_id}&lang=en"
......
......@@ -17,6 +17,7 @@ class Twint:
self.init = "-1"
self.feed = [-1]
self.count = 0
self.user_agent = ""
self.config = config
self.conn = db.Conn(config.Database)
self.d = datelock.Set(self.config.Until, self.config.Since)
......@@ -36,13 +37,13 @@ class Twint:
async def Feed(self):
#logging.info("[<] " + str(datetime.now()) + ':: run+Twint+Feed')
response = await get.RequestUrl(self.config, self.init)
if self.config.Debug:
print(response, file=open("twint-last-request.log", "w", encoding="utf-8"))
self.feed = []
consecutive_errors_count = 0
while True:
response = await get.RequestUrl(self.config, self.init, headers=[("User-Agent", self.user_agent)])
if self.config.Debug:
print(response, file=open("twint-last-request.log", "w", encoding="utf-8"))
self.feed = []
try:
if self.config.Favorites:
self.feed, self.init = feed.Mobile(response)
......@@ -57,10 +58,14 @@ class Twint:
self.feed, self.init = feed.Json(response)
break
except Exception as e:
# Exit only we're 3 times sure it is the end of the road
# Sometimes Twitter says there is no data. But it's a lie.
consecutive_errors_count += 1
if consecutive_errors_count < 3: continue
if consecutive_errors_count < self.config.Retries_count:
# Change disguise
self.user_agent = await get.RandomUserAgent()
continue
print(str(e) + " [x] run.Feed")
print("[!] if get this error but you know for sure that more tweets exist, please open an issue and we will investigate it!")
break
async def follow(self):
......@@ -100,6 +105,7 @@ class Twint:
await output.Tweets(tweet, "", self.config, self.conn)
async def main(self):
self.user_agent = await get.RandomUserAgent()
#logging.info("[<] " + str(datetime.now()) + ':: run+Twint+main')
if self.config.User_id is not None:
self.config.Username = await get.Username(self.config.User_id)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment