Commit 7fb4d83b authored by Francesco Poldi's avatar Francesco Poldi

Fixes stop scraping

Fixes #48 and #60
parent c32948c8
...@@ -413,6 +413,16 @@ async def main(): ...@@ -413,6 +413,16 @@ async def main():
Putting it all together. Putting it all together.
''' '''
if arg.until:
_until = datetime.datetime.strptime(arg.until, "%Y-%m-%d").date()
else:
_until = datetime.date.today()
if arg.since:
_since = datetime.datetime.strptime(arg.since, "%Y-%m-%d").date()
else:
_since = datetime.datetime.strptime("2006-03-21", "%Y-%m-%d").date() # the 1st tweet
if arg.elasticsearch: if arg.elasticsearch:
print("Indexing to Elasticsearch @ " + str(arg.elasticsearch)) print("Indexing to Elasticsearch @ " + str(arg.elasticsearch))
...@@ -430,7 +440,9 @@ async def main(): ...@@ -430,7 +440,9 @@ async def main():
feed = [-1] feed = [-1]
init = -1 init = -1
num = 0 num = 0
while True: while _since < _until:
arg.since = str(_until - datetime.timedelta(days=1))
arg.until = str(_until)
''' '''
If our response from getFeed() has an exception, If our response from getFeed() has an exception,
it signifies there are no position IDs to continue it signifies there are no position IDs to continue
...@@ -440,7 +452,9 @@ async def main(): ...@@ -440,7 +452,9 @@ async def main():
feed, init, count = await getTweets(init) feed, init, count = await getTweets(init)
num += count num += count
else: else:
break print("update\n")
_until = _until - datetime.timedelta(days=1)
feed = [-1]
# Control when we want to stop scraping. # Control when we want to stop scraping.
if arg.limit is not None and num >= int(arg.limit): if arg.limit is not None and num >= int(arg.limit):
break break
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment