Commit 226880cb authored by Cody Zacharias's avatar Cody Zacharias Committed by GitHub

Update search.py

parent ea0e1ec7
from . import datelock, db, get, feed, output from . import datelock, db, get, feed, output
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import aiohttp
import asyncio import asyncio
import concurrent.futures import concurrent.futures
import datetime import datetime
...@@ -8,101 +7,90 @@ import re ...@@ -8,101 +7,90 @@ import re
import sys import sys
class Search: class Search:
def __init__(self, config): def __init__(self, config):
self.init = -1 self.init = -1
self.initial = -1 self.feed = [-1]
self.feed = [-1] self.count = 0
self.count = 0 self.config = config
self.config = config self.d = datelock.Set(self.config)
self.d = datelock.Set(self.config)
if self.config.Elasticsearch:
print("Indexing to Elasticsearch @ " + str(self.config.Elasticsearch))
if self.config.Database: if self.config.Elasticsearch:
print("Inserting into Database: " + str(self.config.Database)) print("[+] Indexing to Elasticsearch @ " + str(self.config.Elasticsearch))
self.conn = db.init(self.config.Database)
if isinstance(self.conn, str):
print(str)
sys.exit(1)
else:
self.conn = ""
if not self.config.Timedelta: if self.config.Database:
if (self.d._until - self.d._since).days > 30: print("[+] Inserting into Database: " + str(self.config.Database))
self.config.Timedelta = 30 self.conn = db.init(self.config.Database)
else: if isinstance(self.conn, str):
self.config.Timedelta = (self.d._until - self.d._since).days print(str)
sys.exit(1)
else:
self.conn = ""
loop = asyncio.get_event_loop() if not self.config.Timedelta:
loop.run_until_complete(self.main()) if (self.d._until - self.d._since).days > 30:
self.config.Timedelta = 30
else:
self.config.Timedelta = (self.d._until - self.d._since).days
async def Feed(self): loop = asyncio.get_event_loop()
connect = aiohttp.TCPConnector(verify_ssl=False) loop.run_until_complete(self.main())
if self.init == -1:
ua = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0'}
else:
ua = ""
async with aiohttp.ClientSession(headers=ua, connector=connect) as session:
response = await get.Response(session, await get.Url(self.config, self.init).search())
self.feed = []
try:
if self.init == -1:
self.feed, self.init = feed.Initial(response)
else:
self.feed, self.init = feed.Cont(response)
except:
pass
async def tweets(self): async def Feed(self):
await self.Feed() url = await get.Url(self.config, self.init).search()
if self.initial != -1: # Temporary fix response = await get.Request(self.config, url)
if self.config.Location: self.feed = []
try: try:
with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor: self.feed, self.init = feed.Json(response)
loop = asyncio.get_event_loop() except:
futures = [] pass
for tweet in self.feed:
self.count += 1
link = tweet.find("a", "tweet-timestamp js-permalink js-nav js-tooltip")["href"]
url = "https://twitter.com{}".format(link)
futures.append(loop.run_in_executor(executor, await get.Tweet(url, self.config, self.conn)))
await asyncio.gather(*futures)
except:
pass
else:
for tweet in self.feed:
self.count += 1
await output.Tweets(tweet, "", self.config, self.conn)
else:
self.initial = 0
async def main(self): async def tweets(self):
if self.config.User_id is not None: await self.Feed()
self.config.Username = await get.Username(self.config.User_id) if self.config.Location:
if self.config.Since and self.config.Until: try:
while self.d._since < self.d._until: with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
self.config.Since = str(self.d._until - datetime.timedelta(days=int(self.config.Timedelta))) loop = asyncio.get_event_loop()
self.config.Until = str(self.d._until) futures = []
if len(self.feed) > 0: for tweet in self.feed:
await self.tweets() self.count += 1
else: link = tweet.find("a", "tweet-timestamp js-permalink js-nav js-tooltip")["href"]
self.d._until = self.d._until - datetime.timedelta(days=int(self.config.Timedelta)) url = "https://twitter.com{}".format(link)
self.feed = [-1] futures.append(loop.run_in_executor(executor, await get.Tweet(url,
self.config, self.conn)))
await asyncio.gather(*futures)
except:
pass
else:
for tweet in self.feed:
self.count += 1
await output.Tweets(tweet, "", self.config, self.conn)
if self.config.Limit is not None and self.count >= int(self.config.Limit): async def main(self):
self.d._until = self.d._until - datetime.timedelta(days=int(self.config.Timedelta)) if self.config.User_id is not None:
self.feed = [-1] self.config.Username = await get.Username(self.config)
else:
while True:
if len(self.feed) > 0:
await self.tweets()
else:
break
if self.config.Limit is not None and self.count >= int(self.config.Limit):
break
if self.config.Count: if self.config.Since and self.config.Until:
print("[+] Finished: Successfully collected {0.count} Tweets.".format(self)) while self.d._since < self.d._until:
self.config.Since = str(self.d._until - datetime.timedelta(days=int(self.config.Timedelta)))
self.config.Until = str(self.d._until)
if len(self.feed) > 0:
await self.tweets()
else:
self.d._until = self.d._until - datetime.timedelta(days=int(self.config.Timedelta))
self.feed = [-1]
if self.config.Limit is not None and self.count >= int(self.config.Limit):
self.d._until = self.d._until - datetime.timedelta(days=int(self.config.Timedelta))
self.feed = [-1]
else:
while True:
if len(self.feed) > 0:
await self.tweets()
else:
break
if self.config.Limit is not None and self.count >= int(self.config.Limit):
break
if self.config.Count:
print("[+] Finished: Successfully collected {0.count} Tweets.".format(self))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment