Commit 3b273006 authored by Francesco Poldi's avatar Francesco Poldi Committed by GitHub

Merge pull request #172 from wardnath/reset_pandas

add function to clean accumulated pandas storage data
#169 
parents e8e1f695 cccc412b
......@@ -122,6 +122,7 @@ def initialize(args):
c.Videos = args.videos
c.Media = args.media
c.Replies = args.replies
c.Pandas_clean = args.pandas_clean
return c
def options():
......@@ -184,6 +185,7 @@ def options():
ap.add_argument("--images", help="Display only Tweets with images.", action="store_true")
ap.add_argument("--media", help="Display Tweets with only images or videos.", action="store_true")
ap.add_argument("--replies", help="Display replies to a subject.", action="store_true")
ap.add_argument("-pc","--pandas-clean", help="Automatically clean Pandas dataframe at every scrape.")
args = ap.parse_args()
return args
......@@ -210,6 +212,9 @@ def main():
if not args.essid:
args.essid = ""
if args.pandas_clean:
twint.storage.panda.clean()
c = initialize(args)
if args.favorites:
......
......@@ -47,3 +47,4 @@ class Config:
Videos = False
Media = False
Replies = False
Pandas_clean = True
from . import datelock, feed, get, output, verbose
from . import datelock, feed, get, output, verbose, storage
from asyncio import get_event_loop
from datetime import timedelta
from .storage import db
......@@ -16,6 +16,9 @@ class Twint:
self.d = datelock.Set(self.config.Until, self.config.Since)
verbose.Elastic(config.Elasticsearch)
if self.config.Pandas_clean:
storage.panda.clean()
if not self.config.Timedelta:
if (self.d._until - self.d._since).days > 30:
self.config.Timedelta = 30
......@@ -78,7 +81,7 @@ class Twint:
async def main(self):
if self.config.User_id is not None:
self.config.Username = await get.Username(self.config.User_id)
if self.config.TwitterSearch and self.config.Since and self.config.Until:
_days = timedelta(days=int(self.config.Timedelta))
while self.d._since < self.d._until:
......@@ -106,10 +109,10 @@ class Twint:
await self.tweets()
else:
break
if get.Limit(self.config.Limit, self.count):
break
if self.config.Count:
verbose.Count(self.count, self.config.Username)
......
......@@ -6,7 +6,6 @@ import warnings
_blocks = []
def update(Tweet, session):
day = weekday(strftime("%A", localtime(Tweet.datetime)))
dt = f"{Tweet.datestamp} {Tweet.timestamp}"
_data = {
......@@ -18,8 +17,6 @@ def update(Tweet, session):
"hashtags": Tweet.hashtags,
"user_id": Tweet.user_id,
"username": Tweet.username,
"day": day,
"hour": hour(Tweet.datetime),
"link": Tweet.link,
"retweet": Tweet.retweet,
"user_rt": Tweet.user_rt,
......@@ -32,6 +29,9 @@ def get():
df = pd.DataFrame(_blocks)
return df
def clean():
_blocks.clear()
def save(_filename, _dataframe, **options):
if options.get("dataname"):
_dataname = options.get("dataname")
......@@ -53,7 +53,9 @@ def save(_filename, _dataframe, **options):
def read(_filename, **options):
if not options.get("dataname"):
_dataname = "Twint"
_dataname = "twint"
else:
_dataname = options.get("dataname")
if not options.get("type"):
_store = pd.HDFStore(_filename)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment