Commit 65709919 authored by Aaron Gonzales's avatar Aaron Gonzales Committed by GitHub

Dual linear + polynomial timer for #604 (#726)

* [FIX]: fix request error bug limiting requests

* [FEAT]: add dual poly. backoff + linear timer if request limit hit

Add a dual polynomial + linear timer to allow for optimal fine grain
searching of twitter timeout value. A new arg `min_wait_time` allows
users to to do one of three things; Firstly, if they know a value that
is optimal already then can input it directly allowing the timer to
stricty be a linear timer. Secondly, if Twitter suddenly changes their
timeout limit and the value no longer satifies the required amount,
the timer will decide the higher valued option between the polynomial
timer's and linear timer's next value so the application still
functions. Third, as the linear timers min_wait_time arg goes to zero
the timer becomes strictly a polynomial backoff timer that allows the
user versatility and less likely to explode in the case of a change in
Twitter request limit timeout.

* [FIX]: add args to config.py

* [REFACT]: minor changes to run.py

* [REFACT]: small change to arg case
parent 6d980f1e
......@@ -46,6 +46,10 @@ def check(args):
error("Error", "Please specify an output file (Example: -o file.csv).")
elif args.json:
error("Error", "Please specify an output file (Example: -o file.json).")
if args.backoff_exponent <= 0:
error("Error", "Please specifiy a positive value for backoff_exponent")
if args.min_wait_time < 0:
error("Error", "Please specifiy a non negative value for min_wait_time")
def loadUserList(ul, _type):
""" Concatenate users
......@@ -61,7 +65,6 @@ def loadUserList(ul, _type):
return un[15:]
return userlist
def initialize(args):
""" Set default values for config from args
"""
......@@ -124,6 +127,8 @@ def initialize(args):
c.Filter_retweets = args.filter_retweets
c.Translate = args.translate
c.TranslateDest = args.translate_dest
c.Backoff_exponent = args.backoff_exponent
c.Min_wait_time = args.min_wait_time
return c
def options():
......@@ -220,8 +225,10 @@ def options():
ap.add_argument("--source", help="Filter the tweets for specific source client.")
ap.add_argument("--members-list", help="Filter the tweets sent by users in a given list.")
ap.add_argument("-fr", "--filter-retweets", help="Exclude retweets from the results.", action="store_true")
ap.add_argument("--backoff-exponent", help="Specify a exponent for the polynomial backoff in case of errors.", type=float, default=3.0)
ap.add_argument("--min-wait-time", type=float, default=15, help="specifiy a minimum wait time in case of scraping limit error. This value will be adjusted by twint if the value provided does not satisfy the limits constraints")
args = ap.parse_args()
return args
def main():
......
......@@ -75,3 +75,5 @@ class Config:
Translate = False
TranslateSrc = "en"
TranslateDest = "en"
Backoff_exponent = 3.0
Min_wait_time = 0
import sys, os, time
from asyncio import get_event_loop, TimeoutError, ensure_future, new_event_loop, set_event_loop
from datetime import datetime
from . import datelock, feed, get, output, verbose, storage
from .storage import db
#from . import _logme
#
#logme = _logme._logger(__name__)
import logging as logme
......@@ -54,11 +50,11 @@ class Twint:
try:
if self.config.Favorites:
self.feed, self.init = feed.Mobile(response)
if not self.count%40:
if not self.count % 40:
time.sleep(5)
elif self.config.Followers or self.config.Following:
self.feed, self.init = feed.Follow(response)
if not self.count%40:
if not self.count % 40:
time.sleep(5)
elif self.config.Profile:
if self.config.Profile_full:
......@@ -91,11 +87,20 @@ class Twint:
# Sometimes Twitter says there is no data. But it's a lie.
consecutive_errors_count += 1
if consecutive_errors_count < self.config.Retries_count:
self.user_agent = await get.RandomUserAgent()
# skip to the next iteration if wait time does not satisfy limit constraints
delay = round(consecutive_errors_count ** self.config.Backoff_exponent, 1)
# if the delay is less than users set min wait time then replace delay
if self.config.Min_wait_time > delay:
delay = self.config.Min_wait_time
sys.stderr.write('sleeping for {} secs\n'.format(delay))
time.sleep(delay)
self.user_agent = await get.RandomUserAgent(wa=True)
continue
logme.critical(__name__+':Twint:Feed:Tweets_known_error:' + str(e))
print(str(e) + " [x] run.Feed")
print("[!] if get this error but you know for sure that more tweets exist, please open an issue and we will investigate it!")
sys.stderr.write(str(e) + " [x] run.Feed")
sys.stderr.write("[!] if get this error but you know for sure that more tweets exist, please open an issue and we will investigate it!")
break
if self.config.Resume:
print(self.init, file=open(self.config.Resume, "a", encoding="utf-8"))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment