Commit b94c64d8 authored by Francesco Poldi's avatar Francesco Poldi

Cleaned run.Feed, added check has_more_items

parent 48cef6e9
...@@ -21,6 +21,8 @@ class Twint: ...@@ -21,6 +21,8 @@ class Twint:
self.feed = [-1] self.feed = [-1]
self.count = 0 self.count = 0
self.consecutive_errors_count = 0
self.has_more_items = True
self.user_agent = "" self.user_agent = ""
self.config = config self.config = config
self.conn = db.Conn(config.Database) self.conn = db.Conn(config.Database)
...@@ -44,12 +46,11 @@ class Twint: ...@@ -44,12 +46,11 @@ class Twint:
async def Feed(self): async def Feed(self):
logme.debug(__name__+':Twint:Feed') logme.debug(__name__+':Twint:Feed')
consecutive_errors_count = 0
while True:
response = await get.RequestUrl(self.config, self.init, headers=[("User-Agent", self.user_agent)]) response = await get.RequestUrl(self.config, self.init, headers=[("User-Agent", self.user_agent)])
if self.config.Debug: if self.config.Debug:
print(response, file=open("twint-last-request.log", "w", encoding="utf-8")) print(response, file=open("twint-last-request.log", "w", encoding="utf-8"))
if self.config.Resume: if self.config.Resume:
print(self.init, file=open(self.config.Resume, "w", encoding="utf-8")) print(self.init, file=open(self.config.Resume, "w", encoding="utf-8"))
...@@ -69,8 +70,11 @@ class Twint: ...@@ -69,8 +70,11 @@ class Twint:
else: else:
self.feed, self.init = feed.profile(response) self.feed, self.init = feed.profile(response)
elif self.config.TwitterSearch: elif self.config.TwitterSearch:
self.feed, self.init = feed.Json(response) self.feed, self.init, _has_more_items = feed.Json(response)
break if (not self.feed) and self.has_more_items:
await self.Feed()
self.has_more_items = _has_more_items
return
except TimeoutError as e: except TimeoutError as e:
if self.config.Proxy_host.lower() == "tor": if self.config.Proxy_host.lower() == "tor":
print("[?] Timed out, changing Tor identity...") print("[?] Timed out, changing Tor identity...")
...@@ -78,30 +82,30 @@ class Twint: ...@@ -78,30 +82,30 @@ class Twint:
logme.critical(__name__+':Twint:Feed:tor-password') logme.critical(__name__+':Twint:Feed:tor-password')
sys.stderr.write("Error: config.Tor_control_password must be set for proxy autorotation!\r\n") sys.stderr.write("Error: config.Tor_control_password must be set for proxy autorotation!\r\n")
sys.stderr.write("Info: What is it? See https://stem.torproject.org/faq.html#can-i-interact-with-tors-controller-interface-directly\r\n") sys.stderr.write("Info: What is it? See https://stem.torproject.org/faq.html#can-i-interact-with-tors-controller-interface-directly\r\n")
break exit(1)
else: else:
get.ForceNewTorIdentity(self.config) get.ForceNewTorIdentity(self.config)
continue await self.Feed()
else: else:
logme.critical(__name__+':Twint:Feed:' + str(e)) logme.critical(__name__+':Twint:Feed:' + str(e))
print(str(e)) exit(str(e))
break
except Exception as e: except Exception as e:
if self.config.Profile or self.config.Favorites: if self.config.Profile or self.config.Favorites:
print("[!] Twitter does not return more data, scrape stops here.") exit("[!] Twitter does not return more data, scrape stops here.")
break
logme.critical(__name__+':Twint:Feed:noData' + str(e)) logme.critical(__name__+':Twint:Feed:noData' + str(e))
# Sometimes Twitter says there is no data. But it's a lie. # Sometimes Twitter says there is no data. But it's a lie.
consecutive_errors_count += 1 self.consecutive_errors_count += 1
if consecutive_errors_count < self.config.Retries_count: if self.consecutive_errors_count < self.config.Retries_count:
self.user_agent = await get.RandomUserAgent() self.user_agent = await get.RandomUserAgent()
continue time.sleep(5)
await self.Feed()
logme.critical(__name__+':Twint:Feed:Tweets_known_error:' + str(e)) logme.critical(__name__+':Twint:Feed:Tweets_known_error:' + str(e))
print(str(e) + " [x] run.Feed") exit(str(e) + " [x] run.Feed\n"+
print("[!] if get this error but you know for sure that more tweets exist, please open an issue and we will investigate it!") "[!] if get this error but you know for sure that more tweets exist, please open an issue and we will investigate it!")
break
async def follow(self): async def follow(self):
self.consecutive_errors_count = 0
await self.Feed() await self.Feed()
if self.config.User_full: if self.config.User_full:
logme.debug(__name__+':Twint:follow:userFull') logme.debug(__name__+':Twint:follow:userFull')
...@@ -114,11 +118,13 @@ class Twint: ...@@ -114,11 +118,13 @@ class Twint:
await output.Username(username, self.config, self.conn) await output.Username(username, self.config, self.conn)
async def favorite(self): async def favorite(self):
self.consecutive_errors_count = 0
logme.debug(__name__+':Twint:favorite') logme.debug(__name__+':Twint:favorite')
await self.Feed() await self.Feed()
self.count += await get.Multi(self.feed, self.config, self.conn) self.count += await get.Multi(self.feed, self.config, self.conn)
async def profile(self): async def profile(self):
self.consecutive_errors_count = 0
await self.Feed() await self.Feed()
if self.config.Profile_full: if self.config.Profile_full:
logme.debug(__name__+':Twint:profileFull') logme.debug(__name__+':Twint:profileFull')
...@@ -130,6 +136,7 @@ class Twint: ...@@ -130,6 +136,7 @@ class Twint:
await output.Tweets(tweet, self.config, self.conn) await output.Tweets(tweet, self.config, self.conn)
async def tweets(self): async def tweets(self):
self.consecutive_errors_count = 0
await self.Feed() await self.Feed()
if self.config.Location: if self.config.Location:
logme.debug(__name__+':Twint:tweets:location') logme.debug(__name__+':Twint:tweets:location')
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment