Commit 61c77daf authored by Cody Zacharias's avatar Cody Zacharias Committed by GitHub

Update output.py

parent eb8ceda6
from .tweet import Tweet
from .profile import User
from . import db, elasticsearch from . import db, elasticsearch
from time import gmtime, strftime from .tweet import Tweet
from .user import User
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from time import localtime, strftime
import asyncio import asyncio
import csv import csv
import datetime import datetime
...@@ -10,200 +10,224 @@ import json ...@@ -10,200 +10,224 @@ import json
import os import os
import re import re
import sys import sys
import time
def write(entry, f): def write(entry, f):
print(entry, file=open(f, "a", encoding="utf-8")) print(entry, file=open(f, "a", encoding="utf-8"))
def writeCSV(Tweet, config): def writeCSV(Tweet, config):
data = {
data = { "id": Tweet.id,
"id": Tweet.id, "date": Tweet.datestamp,
"date": Tweet.datestamp, "time": Tweet.timestamp,
"time": Tweet.timestamp, "timezone": Tweet.timezone,
"timezone": Tweet.timezone, "user_id": Tweet.user_id,
"user_id": Tweet.user_id, "username": Tweet.username,
"username": Tweet.username, "tweet": Tweet.tweet,
"tweet": Tweet.tweet, "replies": Tweet.replies,
"replies": Tweet.replies, "likes": Tweet.likes,
"retweets": Tweet.retweets, "location": Tweet.location,
"likes": Tweet.likes, "hashtags": Tweet.hashtags,
"location": Tweet.location, "link": Tweet.link,
"hashtags": Tweet.hashtags, "retweet": Tweet.is_retweet,
"link": Tweet.link "user_rt": Tweet.user_rt
} }
if config.Custom_csv: if config.Custom_csv:
fieldnames = config.Custom_csv fieldnames = config.Custom_csv
row = {} row = {}
for f in fieldnames: for f in fieldnames:
row[f] = data[f] row[f] = data[f]
else: else:
fieldnames = [ fieldnames = [
"id", "id",
"date", "date",
"time", "time",
"timezone", "timezone",
"user_id", "user_id",
"username", "username",
"tweet", "tweet",
"replies", "replies",
"retweets", "retweets",
"likes", "likes",
"location", "location",
"hashtags", "hashtags",
"link" "link",
] "retweet",
"user_rt"
row = data ]
row = data
if not (os.path.exists(config.Output)):
with open(config.Output, "w", newline='', encoding="utf-8") as csv_file: if not (os.path.exists(config.Output)):
writer = csv.DictWriter(csv_file, fieldnames=fieldnames) with open(config.Output, "w", newline='', encoding="utf-8") as csv_file:
writer.writeheader() writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
with open(config.Output, "a", newline='', encoding="utf-8") as csv_file: writer.writeheader()
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writerow(row) with open(config.Output, "a", newline='', encoding="utf-8") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writerow(row)
def writeJSON(Tweet, file): def writeJSON(Tweet, file):
data = { data = {
"id": Tweet.id, "id": Tweet.id,
"date": Tweet.datestamp, "date": Tweet.datestamp,
"time": Tweet.timestamp, "time": Tweet.timestamp,
"timezone": Tweet.timezone, "timezone": Tweet.timezone,
"user_id": Tweet.user_id, "user_id": Tweet.user_id,
"username": Tweet.username, "username": Tweet.username,
"tweet": Tweet.tweet, "tweet": Tweet.tweet,
"replies": Tweet.replies, "replies": Tweet.replies,
"retweets": Tweet.retweets, "retweets": Tweet.retweets,
"likes": Tweet.likes, "likes": Tweet.likes,
"location": Tweet.location, "location": Tweet.location,
"hashtags": ",".join(Tweet.hashtags), "hashtags": ",".join(Tweet.hashtags),
"link": Tweet.link} "link": Tweet.link,
with open(file, "a", newline='', encoding="utf-8") as json_file: "retweet": Tweet.is_retweet,
json.dump(data, json_file) "user_rt": Tweet.user_rt
json_file.write("\n") }
def getDate(tweet): with open(file, "a", newline='', encoding="utf-8") as json_file:
datestamp = tweet.find("a", "tweet-timestamp")["title"] json_dump(data, json_file)
datestamp = datestamp.rpartition(" - ")[-1] json_file.write("\n")
return datetime.datetime.strptime(datestamp, "%d %b %Y")
def getTime(tweet):
tm = int(tweet.find("span", "_timestamp")["data-time"])
timestamp = str(datetime.timedelta(seconds=tm))
timestamp = timestamp.rpartition(", ")[-1]
return datetime.datetime.strptime(timestamp, "%H:%M:%S")
def getText(tweet): def getText(tweet):
text = tweet.find("p", "tweet-text").text text = tweet.find("p", "tweet-text").text
text = text.replace("\n", "") text = text.replace("\n", "")
text = text.replace("http", " http") text = text.replace("http", " http")
text = text.replace("pic.twitter", " pic.twitter") text = text.replace("pic.twitter", " pic.twitter")
return text return text
def getHashtags(text): def getHashtags(text):
hashtag = re.findall(r'(?i)\#\w+', text, flags=re.UNICODE) hashtag = re.findall(r'(?i)\#\w+', text, flags=re.UNICODE)
return hashtag return hashtag
#return ",".join(hashtag) #return ",".join(hashtag)
def getStat(tweet, stat): def getStat(tweet, stat):
st = "ProfileTweet-action--{} u-hiddenVisually".format(stat) st = "ProfileTweet-action--{} u-hiddenVisually".format(stat)
return tweet.find("span", st).find("span")["data-tweet-stat-count"] return tweet.find("span", st).find("span")["data-tweet-stat-count"]
def getMentions(tweet, text): def getMentions(tweet, text):
try: try:
mentions = tweet.find("div", "js-original-tweet")["data-mentions"].split(" ") mentions = tweet.find("div", "js-original-tweet")["data-mentions"].split(" ")
for i in range(len(mentions)): for i in range(len(mentions)):
mention = "@{}".format(mentions[i]) mention = "@{}".format(mentions[i])
if mention not in text: if mention not in text:
text = "{} {}".format(mention, text) text = "{} {}".format(mention, text)
except: except:
pass pass
return text
return text
def datecheck(datestamp, config): def datecheck(datestamp, config):
d = int(datestamp.replace("-", "")) if config.Since and config.Until:
s = int(config.Since.replace("-", "")) d = int(datestamp.replace("-", ""))
if d < s: s = int(config.Since.replace("-", ""))
sys.exit(1) if d < s:
return False
return True
def retweet(config, tweet):
if config.Profile and tweet.username.lower() != config.Username:
return True
# Sort HTML
def getTweet(tw, location, config): def getTweet(tw, location, config):
t = Tweet() t = Tweet()
t.id = tw.find("div")["data-item-id"]
t.date = getDate(tw) t.id = tw.find("div")["data-item-id"]
t.datestamp = t.date.strftime("%Y-%m-%d") t.datetime = int(tw.find("span", "_timestamp")["data-time"])
#if config.Since and config.Until: t.datestamp = strftime("%Y-%m-%d", localtime(t.datetime))
# datecheck(t.datestamp, config) t.timestamp = strftime("%H:%M:%S", localtime(t.datetime))
t.time = getTime(tw) t.user_id = tw.find("a", "account-group js-account-group js-action-profile js-user-profile-link js-nav")["data-user-id"]
t.timestamp = t.time.strftime("%H:%M:%S") t.username = tw.find("span", "username").text.replace("@", "")
t.user_id = tw.find("a", "account-group js-account-group js-action-profile js-user-profile-link js-nav")["data-user-id"] t.timezone = strftime("%Z", localtime())
t.username = tw.find("span", "username").text.replace("@", "") for img in tw.findAll("img", "Emoji Emoji--forText"):
t.timezone = strftime("%Z", gmtime()) img.replaceWith("<{}>".format(img['aria-label']))
for img in tw.findAll("img", "Emoji Emoji--forText"): t.tweet = getMentions(tw, getText(tw))
img.replaceWith("<{}>".format(img['aria-label'])) t.location = location
t.tweet = getMentions(tw, getText(tw)) t.hashtags = getHashtags(t.tweet)
t.location = location t.replies = getStat(tw, "reply")
t.hashtags = getHashtags(t.tweet) t.retweets = getStat(tw, "retweet")
t.replies = getStat(tw, "reply") t.likes = getStat(tw, "favorite")
t.retweets = getStat(tw, "retweet") t.link = "https://twitter.com/{0.username}/status/{0.id}".format(t)
t.likes = getStat(tw, "favorite")
t.link = "https://twitter.com/{0.username}/status/{0.id}/".format(t) if retweet(config, t):
return t t.is_retweet = True
t.user_rt = config.Username
return t
async def getUser(user): async def getUser(user):
u = User() u = User()
u.name = user.find("a")["name"] u.name = user.find("a")["name"]
return u return u
def getOutput(Tweet, config, conn):
if config.Users_only:
output = Tweet.username
elif config.Tweets_only:
output = Tweet.tweet
elif config.Format:
output = config.Format.replace("{id}", Tweet.id)
output = output.replace("{date}", Tweet.datestamp)
output = output.replace("{time}", Tweet.timestamp)
output = output.replace("{user_id}", Tweet.user_id)
output = output.replace("{username}", Tweet.username)
output = output.replace("{timezone}", Tweet.timezone)
output = output.replace("{tweet}", Tweet.tweet)
output = output.replace("{location}", Tweet.location)
output = output.replace("{hashtags}", str(Tweet.hashtags))
output = output.replace("{replies}", Tweet.replies)
output = output.replace("{retweets}", Tweet.retweets)
output = output.replace("{likes}", Tweet.likes)
output = output.replace("{link}", Tweet.link)
output = output.replace("{is_retweet}", Tweet.is_retweet)
output = output.replace("{user_rt}", Tweet.user_rt)
else:
output = "{} {} {} {} ".format(Tweet.id, Tweet.datestamp,
Tweet.timestamp, Tweet.timezone)
if retweet(config, Tweet):
output += "RT "
output += "<{}> {}".format(Tweet.username, Tweet.tweet)
if config.Show_hashtags:
output += " {}".format(",".join(Tweet.hashtags))
if config.Stats:
output += " | {} replies {} retweets {} likes".format(Tweet.replies,
Tweet.retweets, Tweet.likes)
if config.Location:
output += " | Location {}".format(Tweet.location)
return output
def is_tweet(tw):
try:
tw.find("div")["data-item-id"]
return True
except:
return False
async def Tweets(tw, location, config, conn): async def Tweets(tw, location, config, conn):
copyright = tw.find("div", "StreamItemContent--withheld") copyright = tw.find("div", "StreamItemContent--withheld")
if copyright is None: if copyright is None and is_tweet(tw):
Tweet = getTweet(tw, location, config) Tweet = getTweet(tw, location, config)
if datecheck(Tweet.datestamp, config):
if config.Database: output = getOutput(Tweet, config, conn)
db.tweets(conn, Tweet)
if config.Elasticsearch: if config.Database:
elasticsearch.Tweet(Tweet, config.Elasticsearch, config.Essid) db.tweets(conn, Tweet)
if config.Elasticsearch:
if config.Users_only: elasticsearch.Tweet(Tweet, config.Elasticsearch, config.Essid)
output = Tweet.username
elif config.Tweets_only: if config.Output != None:
output = Tweet.tweet if config.Store_csv:
elif config.Format: writeCSV(Tweet, config)
output = config.Format.replace("{id}", Tweet.id) elif config.Store_json:
output = output.replace("{date}", Tweet.datestamp) writeJSON(Tweet, config.Output)
output = output.replace("{time}", Tweet.timestamp) else:
output = output.replace("{user_id}", Tweet.user_id) write(output, config.Output)
output = output.replace("{username}", Tweet.username)
output = output.replace("{timezone}", Tweet.timezone) if config.Elasticsearch:
output = output.replace("{tweet}", Tweet.tweet) print(output, end=".", flush=True)
output = output.replace("{location}", Tweet.location) else:
output = output.replace("{hashtags}", str(Tweet.hashtags)) print(output)
output = output.replace("{replies}", Tweet.replies)
output = output.replace("{retweets}", Tweet.retweets)
output = output.replace("{likes}", Tweet.likes)
output = output.replace("{link}", Tweet.link)
else:
output = "{} {} {} {} <{}> {}".format(Tweet.id, Tweet.datestamp, Tweet.timestamp, Tweet.timezone, Tweet.username, Tweet.tweet)
if config.Show_hashtags:
output+= " {}".format(",".join(Tweet.hashtags))
if config.Stats:
output+= " | {} replies {} retweets {} likes".format(Tweet.replies, Tweet.retweets, Tweet.likes)
if config.Location:
output+= " | Location {}".format(Tweet.location)
if config.Output != None:
if config.Store_csv:
writeCSV(Tweet, config)
elif config.Store_json:
writeJSON(Tweet, config.Output)
else:
write(output, config.Output)
# Print output
if config.Elasticsearch:
print(output, end=".", flush=True)
else:
print(output)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment