Commit 61c77daf authored by Cody Zacharias's avatar Cody Zacharias Committed by GitHub

Update output.py

parent eb8ceda6
from .tweet import Tweet
from .profile import User
from . import db, elasticsearch
from time import gmtime, strftime
from .tweet import Tweet
from .user import User
from bs4 import BeautifulSoup
from time import localtime, strftime
import asyncio
import csv
import datetime
......@@ -10,13 +10,11 @@ import json
import os
import re
import sys
import time
def write(entry, f):
print(entry, file=open(f, "a", encoding="utf-8"))
def writeCSV(Tweet, config):
data = {
"id": Tweet.id,
"date": Tweet.datestamp,
......@@ -26,11 +24,12 @@ def writeCSV(Tweet, config):
"username": Tweet.username,
"tweet": Tweet.tweet,
"replies": Tweet.replies,
"retweets": Tweet.retweets,
"likes": Tweet.likes,
"location": Tweet.location,
"hashtags": Tweet.hashtags,
"link": Tweet.link
"link": Tweet.link,
"retweet": Tweet.is_retweet,
"user_rt": Tweet.user_rt
}
if config.Custom_csv:
......@@ -52,15 +51,17 @@ def writeCSV(Tweet, config):
"likes",
"location",
"hashtags",
"link"
"link",
"retweet",
"user_rt"
]
row = data
if not (os.path.exists(config.Output)):
with open(config.Output, "w", newline='', encoding="utf-8") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
with open(config.Output, "a", newline='', encoding="utf-8") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writerow(row)
......@@ -79,22 +80,15 @@ def writeJSON(Tweet, file):
"likes": Tweet.likes,
"location": Tweet.location,
"hashtags": ",".join(Tweet.hashtags),
"link": Tweet.link}
"link": Tweet.link,
"retweet": Tweet.is_retweet,
"user_rt": Tweet.user_rt
}
with open(file, "a", newline='', encoding="utf-8") as json_file:
json.dump(data, json_file)
json_dump(data, json_file)
json_file.write("\n")
def getDate(tweet):
datestamp = tweet.find("a", "tweet-timestamp")["title"]
datestamp = datestamp.rpartition(" - ")[-1]
return datetime.datetime.strptime(datestamp, "%d %b %Y")
def getTime(tweet):
tm = int(tweet.find("span", "_timestamp")["data-time"])
timestamp = str(datetime.timedelta(seconds=tm))
timestamp = timestamp.rpartition(", ")[-1]
return datetime.datetime.strptime(timestamp, "%H:%M:%S")
def getText(tweet):
text = tweet.find("p", "tweet-text").text
text = text.replace("\n", "")
......@@ -120,27 +114,31 @@ def getMentions(tweet, text):
text = "{} {}".format(mention, text)
except:
pass
return text
def datecheck(datestamp, config):
if config.Since and config.Until:
d = int(datestamp.replace("-", ""))
s = int(config.Since.replace("-", ""))
if d < s:
sys.exit(1)
return False
return True
def retweet(config, tweet):
if config.Profile and tweet.username.lower() != config.Username:
return True
# Sort HTML
def getTweet(tw, location, config):
t = Tweet()
t.id = tw.find("div")["data-item-id"]
t.date = getDate(tw)
t.datestamp = t.date.strftime("%Y-%m-%d")
#if config.Since and config.Until:
# datecheck(t.datestamp, config)
t.time = getTime(tw)
t.timestamp = t.time.strftime("%H:%M:%S")
t.datetime = int(tw.find("span", "_timestamp")["data-time"])
t.datestamp = strftime("%Y-%m-%d", localtime(t.datetime))
t.timestamp = strftime("%H:%M:%S", localtime(t.datetime))
t.user_id = tw.find("a", "account-group js-account-group js-action-profile js-user-profile-link js-nav")["data-user-id"]
t.username = tw.find("span", "username").text.replace("@", "")
t.timezone = strftime("%Z", gmtime())
t.timezone = strftime("%Z", localtime())
for img in tw.findAll("img", "Emoji Emoji--forText"):
img.replaceWith("<{}>".format(img['aria-label']))
t.tweet = getMentions(tw, getText(tw))
......@@ -149,7 +147,12 @@ def getTweet(tw, location, config):
t.replies = getStat(tw, "reply")
t.retweets = getStat(tw, "retweet")
t.likes = getStat(tw, "favorite")
t.link = "https://twitter.com/{0.username}/status/{0.id}/".format(t)
t.link = "https://twitter.com/{0.username}/status/{0.id}".format(t)
if retweet(config, t):
t.is_retweet = True
t.user_rt = config.Username
return t
async def getUser(user):
......@@ -157,16 +160,7 @@ async def getUser(user):
u.name = user.find("a")["name"]
return u
async def Tweets(tw, location, config, conn):
copyright = tw.find("div", "StreamItemContent--withheld")
if copyright is None:
Tweet = getTweet(tw, location, config)
if config.Database:
db.tweets(conn, Tweet)
if config.Elasticsearch:
elasticsearch.Tweet(Tweet, config.Elasticsearch, config.Essid)
def getOutput(Tweet, config, conn):
if config.Users_only:
output = Tweet.username
elif config.Tweets_only:
......@@ -185,14 +179,45 @@ async def Tweets(tw, location, config, conn):
output = output.replace("{retweets}", Tweet.retweets)
output = output.replace("{likes}", Tweet.likes)
output = output.replace("{link}", Tweet.link)
output = output.replace("{is_retweet}", Tweet.is_retweet)
output = output.replace("{user_rt}", Tweet.user_rt)
else:
output = "{} {} {} {} <{}> {}".format(Tweet.id, Tweet.datestamp, Tweet.timestamp, Tweet.timezone, Tweet.username, Tweet.tweet)
output = "{} {} {} {} ".format(Tweet.id, Tweet.datestamp,
Tweet.timestamp, Tweet.timezone)
if retweet(config, Tweet):
output += "RT "
output += "<{}> {}".format(Tweet.username, Tweet.tweet)
if config.Show_hashtags:
output+= " {}".format(",".join(Tweet.hashtags))
output += " {}".format(",".join(Tweet.hashtags))
if config.Stats:
output+= " | {} replies {} retweets {} likes".format(Tweet.replies, Tweet.retweets, Tweet.likes)
output += " | {} replies {} retweets {} likes".format(Tweet.replies,
Tweet.retweets, Tweet.likes)
if config.Location:
output+= " | Location {}".format(Tweet.location)
output += " | Location {}".format(Tweet.location)
return output
def is_tweet(tw):
try:
tw.find("div")["data-item-id"]
return True
except:
return False
async def Tweets(tw, location, config, conn):
copyright = tw.find("div", "StreamItemContent--withheld")
if copyright is None and is_tweet(tw):
Tweet = getTweet(tw, location, config)
if datecheck(Tweet.datestamp, config):
output = getOutput(Tweet, config, conn)
if config.Database:
db.tweets(conn, Tweet)
if config.Elasticsearch:
elasticsearch.Tweet(Tweet, config.Elasticsearch, config.Essid)
if config.Output != None:
if config.Store_csv:
......@@ -202,7 +227,6 @@ async def Tweets(tw, location, config, conn):
else:
write(output, config.Output)
# Print output
if config.Elasticsearch:
print(output, end=".", flush=True)
else:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment