Commit d44a6b58 authored by Cody Zacharias's avatar Cody Zacharias Committed by GitHub

Added --since

parent 52f042c2
......@@ -9,13 +9,13 @@ import datetime
import json
import sys
async def getUrl(Min):
if Min == -1:
async def getUrl(init):
if init == -1:
url = "https://twitter.com/search?f=tweets&vertical=default&lang=en&q="
else:
url = "https://twitter.com/i/search/timeline?f=tweets&vertical=default"
url+= "&lang=en&include_available_features=1&include_entities=1&reset_"
url+= "error_state=false&src=typd&max_position={}&q=".format(Min)
url+= "error_state=false&src=typd&max_position={}&q=".format(init)
if arg.u != None:
url+= "from%3A{0.u}".format(arg)
......@@ -24,6 +24,8 @@ async def getUrl(Min):
url+= "%20{0.s}".format(arg)
if arg.year != None:
url+= "%20until%3A{0.year}-1-1".format(arg)
if arg.since != None:
url+= "%20since%3A{0.since}".format(arg)
if arg.fruit:
url+= "%20myspace.com%20OR%20last.fm%20OR"
url+= "%20mail%20OR%20email%20OR%20gmail%20OR%20e-mail"
......@@ -39,32 +41,32 @@ async def fetch(session, url):
async with session.get(url) as response:
return await response.text()
async def getFeed(Min):
async def getFeed(init):
async with aiohttp.ClientSession() as session:
r = await fetch(session, await getUrl(Min))
r = await fetch(session, await getUrl(init))
feed = []
try:
if Min == -1:
if init == -1:
html = r
else:
json_response = json.loads(r)
html = json_response["items_html"]
soup = BeautifulSoup(html, "html.parser")
feed = soup.find_all("li", "js-stream-item")
if Min == -1:
Min = "TWEET-{}-{}".format(feed[-1]["data-item-id"], feed[0]["data-item-id"])
if init == -1:
init = "TWEET-{}-{}".format(feed[-1]["data-item-id"], feed[0]["data-item-id"])
else:
minsplit = json_response["min_position"].split("-")
minsplit[1] = feed[-1]["data-item-id"]
Min = "-".join(minsplit)
split = json_response["min_position"].split("-")
split[1] = feed[-1]["data-item-id"]
init = "-".join(split)
except:
pass
return feed, Min
return feed, init
async def getTweets(Min):
feed, Min = await getFeed(Min)
for tweet in feed:
async def getTweets(init):
tweets, init = await getFeed(init)
for tweet in tweets:
tweetid = tweet["data-item-id"]
datestamp = tweet.find("a", "tweet-timestamp")["title"].rpartition(" - ")[-1]
d = datetime.datetime.strptime(datestamp, "%d %b %Y")
......@@ -96,25 +98,26 @@ async def getTweets(Min):
print(output)
return feed, Min
return tweets, init
async def main():
feed = [-1]
Min = -1
init = -1
while True:
if len(feed) > 0:
feed, Min = await getTweets(Min)
feed, init = await getTweets(init)
else:
break
if __name__ == "__main__":
ap = argparse.ArgumentParser(prog="tweep.py", usage="python3 %(prog)s [options]", description="tweep.py - An Advanced Twitter Scraping Tool")
ap.add_argument("-u", help="User's tweets you want to scrape.")
ap.add_argument("-s", help="Search for tweets containing this word or phrase.")
ap.add_argument("-u", help="User's Tweets you want to scrape.")
ap.add_argument("-s", help="Search for Tweets containing this word or phrase.")
ap.add_argument("-o", help="Save output to a file.")
ap.add_argument("--year", help="Filter tweets before specified year.")
ap.add_argument("--fruit", help="Display 'low-hanging-fruit' tweets.", action="store_true")
ap.add_argument("--tweets", help="Display tweets only.", action="store_true")
ap.add_argument("--year", help="Filter Tweets before specified year.")
ap.add_argument("--since", help="Filter Tweets sent since date (Example: 2017-12-27).")
ap.add_argument("--fruit", help="Display 'low-hanging-fruit' Tweets.", action="store_true")
ap.add_argument("--tweets", help="Display Tweets only.", action="store_true")
ap.add_argument("--verified", help="Display Tweets only from verified users (Use with -s).", action="store_true")
ap.add_argument("--users", help="Display users only (Use with -s).", action="store_true")
arg = ap.parse_args()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment