Commit d44a6b58 authored by Cody Zacharias's avatar Cody Zacharias Committed by GitHub

Added --since

parent 52f042c2
...@@ -9,13 +9,13 @@ import datetime ...@@ -9,13 +9,13 @@ import datetime
import json import json
import sys import sys
async def getUrl(Min): async def getUrl(init):
if Min == -1: if init == -1:
url = "https://twitter.com/search?f=tweets&vertical=default&lang=en&q=" url = "https://twitter.com/search?f=tweets&vertical=default&lang=en&q="
else: else:
url = "https://twitter.com/i/search/timeline?f=tweets&vertical=default" url = "https://twitter.com/i/search/timeline?f=tweets&vertical=default"
url+= "&lang=en&include_available_features=1&include_entities=1&reset_" url+= "&lang=en&include_available_features=1&include_entities=1&reset_"
url+= "error_state=false&src=typd&max_position={}&q=".format(Min) url+= "error_state=false&src=typd&max_position={}&q=".format(init)
if arg.u != None: if arg.u != None:
url+= "from%3A{0.u}".format(arg) url+= "from%3A{0.u}".format(arg)
...@@ -24,6 +24,8 @@ async def getUrl(Min): ...@@ -24,6 +24,8 @@ async def getUrl(Min):
url+= "%20{0.s}".format(arg) url+= "%20{0.s}".format(arg)
if arg.year != None: if arg.year != None:
url+= "%20until%3A{0.year}-1-1".format(arg) url+= "%20until%3A{0.year}-1-1".format(arg)
if arg.since != None:
url+= "%20since%3A{0.since}".format(arg)
if arg.fruit: if arg.fruit:
url+= "%20myspace.com%20OR%20last.fm%20OR" url+= "%20myspace.com%20OR%20last.fm%20OR"
url+= "%20mail%20OR%20email%20OR%20gmail%20OR%20e-mail" url+= "%20mail%20OR%20email%20OR%20gmail%20OR%20e-mail"
...@@ -39,32 +41,32 @@ async def fetch(session, url): ...@@ -39,32 +41,32 @@ async def fetch(session, url):
async with session.get(url) as response: async with session.get(url) as response:
return await response.text() return await response.text()
async def getFeed(Min): async def getFeed(init):
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession() as session:
r = await fetch(session, await getUrl(Min)) r = await fetch(session, await getUrl(init))
feed = [] feed = []
try: try:
if Min == -1: if init == -1:
html = r html = r
else: else:
json_response = json.loads(r) json_response = json.loads(r)
html = json_response["items_html"] html = json_response["items_html"]
soup = BeautifulSoup(html, "html.parser") soup = BeautifulSoup(html, "html.parser")
feed = soup.find_all("li", "js-stream-item") feed = soup.find_all("li", "js-stream-item")
if Min == -1: if init == -1:
Min = "TWEET-{}-{}".format(feed[-1]["data-item-id"], feed[0]["data-item-id"]) init = "TWEET-{}-{}".format(feed[-1]["data-item-id"], feed[0]["data-item-id"])
else: else:
minsplit = json_response["min_position"].split("-") split = json_response["min_position"].split("-")
minsplit[1] = feed[-1]["data-item-id"] split[1] = feed[-1]["data-item-id"]
Min = "-".join(minsplit) init = "-".join(split)
except: except:
pass pass
return feed, Min return feed, init
async def getTweets(Min): async def getTweets(init):
feed, Min = await getFeed(Min) tweets, init = await getFeed(init)
for tweet in feed: for tweet in tweets:
tweetid = tweet["data-item-id"] tweetid = tweet["data-item-id"]
datestamp = tweet.find("a", "tweet-timestamp")["title"].rpartition(" - ")[-1] datestamp = tweet.find("a", "tweet-timestamp")["title"].rpartition(" - ")[-1]
d = datetime.datetime.strptime(datestamp, "%d %b %Y") d = datetime.datetime.strptime(datestamp, "%d %b %Y")
...@@ -96,25 +98,26 @@ async def getTweets(Min): ...@@ -96,25 +98,26 @@ async def getTweets(Min):
print(output) print(output)
return feed, Min return tweets, init
async def main(): async def main():
feed = [-1] feed = [-1]
Min = -1 init = -1
while True: while True:
if len(feed) > 0: if len(feed) > 0:
feed, Min = await getTweets(Min) feed, init = await getTweets(init)
else: else:
break break
if __name__ == "__main__": if __name__ == "__main__":
ap = argparse.ArgumentParser(prog="tweep.py", usage="python3 %(prog)s [options]", description="tweep.py - An Advanced Twitter Scraping Tool") ap = argparse.ArgumentParser(prog="tweep.py", usage="python3 %(prog)s [options]", description="tweep.py - An Advanced Twitter Scraping Tool")
ap.add_argument("-u", help="User's tweets you want to scrape.") ap.add_argument("-u", help="User's Tweets you want to scrape.")
ap.add_argument("-s", help="Search for tweets containing this word or phrase.") ap.add_argument("-s", help="Search for Tweets containing this word or phrase.")
ap.add_argument("-o", help="Save output to a file.") ap.add_argument("-o", help="Save output to a file.")
ap.add_argument("--year", help="Filter tweets before specified year.") ap.add_argument("--year", help="Filter Tweets before specified year.")
ap.add_argument("--fruit", help="Display 'low-hanging-fruit' tweets.", action="store_true") ap.add_argument("--since", help="Filter Tweets sent since date (Example: 2017-12-27).")
ap.add_argument("--tweets", help="Display tweets only.", action="store_true") ap.add_argument("--fruit", help="Display 'low-hanging-fruit' Tweets.", action="store_true")
ap.add_argument("--tweets", help="Display Tweets only.", action="store_true")
ap.add_argument("--verified", help="Display Tweets only from verified users (Use with -s).", action="store_true") ap.add_argument("--verified", help="Display Tweets only from verified users (Use with -s).", action="store_true")
ap.add_argument("--users", help="Display users only (Use with -s).", action="store_true") ap.add_argument("--users", help="Display users only (Use with -s).", action="store_true")
arg = ap.parse_args() arg = ap.parse_args()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment