Commit 333704f0 authored by Maxim Gubin's avatar Maxim Gubin Committed by Francesco Poldi

Dynamic user agent + fixing retry bug (#271)

* Add dynamic user agent and bug fix

* Changed error message

Changed error message to clarify that this kind of errors is not expected and we are trying to solve them, it seems that Twitter tries to block our requests and lies about tweets existence
parent b3dac333
...@@ -44,6 +44,7 @@ class Config: ...@@ -44,6 +44,7 @@ class Config:
Index_users = "twintuser" Index_users = "twintuser"
Index_type = "items" Index_type = "items"
Debug = False Debug = False
Retries_count = 10
Resume = None Resume = None
Images = False Images = False
Videos = False Videos = False
......
...@@ -5,7 +5,8 @@ import sys ...@@ -5,7 +5,8 @@ import sys
import aiohttp import aiohttp
import asyncio import asyncio
import concurrent.futures import concurrent.futures
import random
from json import loads
from aiohttp_socks import SocksConnector, SocksVer from aiohttp_socks import SocksConnector, SocksVer
from . import url from . import url
...@@ -14,7 +15,7 @@ from .user import inf ...@@ -14,7 +15,7 @@ from .user import inf
#import logging #import logging
async def RequestUrl(config, init): async def RequestUrl(config, init, headers = []):
#loggin.info("[<] " + str(datetime.now()) + ':: get+requestURL') #loggin.info("[<] " + str(datetime.now()) + ':: get+requestURL')
_connector = None _connector = None
if config.Proxy_host is not None: if config.Proxy_host is not None:
...@@ -45,17 +46,16 @@ async def RequestUrl(config, init): ...@@ -45,17 +46,16 @@ async def RequestUrl(config, init):
print("Error: Please specify --proxy-host, --proxy-port, and --proxy-type") print("Error: Please specify --proxy-host, --proxy-port, and --proxy-type")
sys.exit(1) sys.exit(1)
if config.Profile: if config.Profile:
if config.Profile_full: if config.Profile_full:
_url = await url.MobileProfile(config.Username, init) _url = await url.MobileProfile(config.Username, init)
response = await MobileRequest(_url, connector=_connector) response = await MobileRequest(_url, connector=_connector)
else: else:
_url = await url.Profile(config.Username, init) _url = await url.Profile(config.Username, init)
response = await Request(_url, connector=_connector) response = await Request(_url, connector=_connector, headers=headers)
elif config.TwitterSearch: elif config.TwitterSearch:
_url, params = await url.Search(config, init) _url, params = await url.Search(config, init)
response = await Request(_url, params=params, connector=_connector) response = await Request(_url, params=params, connector=_connector, headers=headers)
else: else:
if config.Following: if config.Following:
_url = await url.Following(config.Username, init) _url = await url.Following(config.Username, init)
...@@ -80,21 +80,26 @@ async def MobileRequest(url, **options): ...@@ -80,21 +80,26 @@ async def MobileRequest(url, **options):
return await Response(session, url) return await Response(session, url)
async def Request(url, connector=None, params=[]): async def Request(url, connector=None, params=[], headers=[]):
#loggin.info("[<] " + str(datetime.now()) + ':: get+Request') #loggin.info("[<] " + str(datetime.now()) + ':: get+Request')
if connector: if connector:
async with aiohttp.ClientSession(connector=connector) as session: async with aiohttp.ClientSession(connector=connector, headers=headers) as session:
return await Response(session, url, params) return await Response(session, url, params)
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession() as session:
return await Response(session, url, params) return await Response(session, url, params)
async def Response(session, url, params=[]): async def Response(session, url, params=[]):
#loggin.info("[<] " + str(datetime.now()) + ':: get+Response') #loggin.info("[<] " + str(datetime.now()) + ':: get+Response')
headers = {'User-Agent': 'Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/0.8.12' }
with timeout(30): with timeout(30):
async with session.get(url, ssl=False, headers=headers, params=params) as response: async with session.get(url, ssl=False, params=params) as response:
return await response.text() return await response.text()
async def RandomUserAgent():
url = "https://fake-useragent.herokuapp.com/browsers/0.1.8"
r = await Request(url)
browsers = loads(r)['browsers']
return random.choice(browsers[random.choice(list(browsers))])
async def Username(_id): async def Username(_id):
#loggin.info("[<] " + str(datetime.now()) + ':: get+Username') #loggin.info("[<] " + str(datetime.now()) + ':: get+Username')
url = f"https://twitter.com/intent/user?user_id={_id}&lang=en" url = f"https://twitter.com/intent/user?user_id={_id}&lang=en"
......
...@@ -17,6 +17,7 @@ class Twint: ...@@ -17,6 +17,7 @@ class Twint:
self.init = "-1" self.init = "-1"
self.feed = [-1] self.feed = [-1]
self.count = 0 self.count = 0
self.user_agent = ""
self.config = config self.config = config
self.conn = db.Conn(config.Database) self.conn = db.Conn(config.Database)
self.d = datelock.Set(self.config.Until, self.config.Since) self.d = datelock.Set(self.config.Until, self.config.Since)
...@@ -36,13 +37,13 @@ class Twint: ...@@ -36,13 +37,13 @@ class Twint:
async def Feed(self): async def Feed(self):
#logging.info("[<] " + str(datetime.now()) + ':: run+Twint+Feed') #logging.info("[<] " + str(datetime.now()) + ':: run+Twint+Feed')
response = await get.RequestUrl(self.config, self.init)
if self.config.Debug:
print(response, file=open("twint-last-request.log", "w", encoding="utf-8"))
self.feed = []
consecutive_errors_count = 0 consecutive_errors_count = 0
while True: while True:
response = await get.RequestUrl(self.config, self.init, headers=[("User-Agent", self.user_agent)])
if self.config.Debug:
print(response, file=open("twint-last-request.log", "w", encoding="utf-8"))
self.feed = []
try: try:
if self.config.Favorites: if self.config.Favorites:
self.feed, self.init = feed.Mobile(response) self.feed, self.init = feed.Mobile(response)
...@@ -57,10 +58,14 @@ class Twint: ...@@ -57,10 +58,14 @@ class Twint:
self.feed, self.init = feed.Json(response) self.feed, self.init = feed.Json(response)
break break
except Exception as e: except Exception as e:
# Exit only we're 3 times sure it is the end of the road # Sometimes Twitter says there is no data. But it's a lie.
consecutive_errors_count += 1 consecutive_errors_count += 1
if consecutive_errors_count < 3: continue if consecutive_errors_count < self.config.Retries_count:
# Change disguise
self.user_agent = await get.RandomUserAgent()
continue
print(str(e) + " [x] run.Feed") print(str(e) + " [x] run.Feed")
print("[!] if get this error but you know for sure that more tweets exist, please open an issue and we will investigate it!")
break break
async def follow(self): async def follow(self):
...@@ -100,6 +105,7 @@ class Twint: ...@@ -100,6 +105,7 @@ class Twint:
await output.Tweets(tweet, "", self.config, self.conn) await output.Tweets(tweet, "", self.config, self.conn)
async def main(self): async def main(self):
self.user_agent = await get.RandomUserAgent()
#logging.info("[<] " + str(datetime.now()) + ':: run+Twint+main') #logging.info("[<] " + str(datetime.now()) + ':: run+Twint+main')
if self.config.User_id is not None: if self.config.User_id is not None:
self.config.Username = await get.Username(self.config.User_id) self.config.Username = await get.Username(self.config.User_id)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment