Commit 7287147b authored by Cody Zacharias's avatar Cody Zacharias Committed by GitHub

Fix tabs

parent aaa37fb8
...@@ -13,24 +13,48 @@ import sys ...@@ -13,24 +13,48 @@ import sys
import time import time
def write(entry, f): def write(entry, f):
print(entry, file=open(f, "a", encoding="utf-8")) print(entry, file=open(f, "a", encoding="utf-8"))
def writeCSV(Tweet, file): def writeCSV(Tweet, file):
fieldnames = [ fieldnames = [
"id", "id",
"date", "date",
"time", "time",
"timezone", "timezone",
"user_id", "user_id",
"username", "username",
"tweet", "tweet",
"replies", "replies",
"retweets", "retweets",
"likes", "likes",
"location", "location",
"hashtags", "hashtags",
"link"] "link"]
row = { row = {
"id": Tweet.id,
"date": Tweet.datestamp,
"time": Tweet.timestamp,
"timezone": Tweet.timezone,
"user_id": Tweet.user_id,
"username": Tweet.username,
"tweet": Tweet.tweet,
"replies": Tweet.replies,
"retweets": Tweet.retweets,
"likes": Tweet.likes,
"location": Tweet.location,
"hashtags": Tweet.hashtags,
"link": Tweet.link
}
if not (os.path.exists(file)):
with open(file, "w", newline='', encoding="utf-8") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
with open(file, "a", newline='', encoding="utf-8") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writerow(row)
def writeJSON(Tweet, file):
data = {
"id": Tweet.id, "id": Tweet.id,
"date": Tweet.datestamp, "date": Tweet.datestamp,
"time": Tweet.timestamp, "time": Tweet.timestamp,
...@@ -42,152 +66,128 @@ def writeCSV(Tweet, file): ...@@ -42,152 +66,128 @@ def writeCSV(Tweet, file):
"retweets": Tweet.retweets, "retweets": Tweet.retweets,
"likes": Tweet.likes, "likes": Tweet.likes,
"location": Tweet.location, "location": Tweet.location,
"hashtags": Tweet.hashtags, "hashtags": ",".join(Tweet.hashtags),
"link": Tweet.link "link": Tweet.link}
} with open(file, "a", newline='', encoding="utf-8") as json_file:
if not (os.path.exists(file)): json.dump(data, json_file)
with open(file, "w", newline='', encoding="utf-8") as csv_file: json_file.write("\n")
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
with open(file, "a", newline='', encoding="utf-8") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writerow(row)
def writeJSON(Tweet, file):
data = {
"id": Tweet.id,
"date": Tweet.datestamp,
"time": Tweet.timestamp,
"timezone": Tweet.timezone,
"user_id": Tweet.user_id,
"username": Tweet.username,
"tweet": Tweet.tweet,
"replies": Tweet.replies,
"retweets": Tweet.retweets,
"likes": Tweet.likes,
"location": Tweet.location,
"hashtags": ",".join(Tweet.hashtags),
"link": Tweet.link}
with open(file, "a", newline='', encoding="utf-8") as json_file:
json.dump(data, json_file)
json_file.write("\n")
def getDate(tweet): def getDate(tweet):
datestamp = tweet.find("a", "tweet-timestamp")["title"] datestamp = tweet.find("a", "tweet-timestamp")["title"]
datestamp = datestamp.rpartition(" - ")[-1] datestamp = datestamp.rpartition(" - ")[-1]
return datetime.datetime.strptime(datestamp, "%d %b %Y") return datetime.datetime.strptime(datestamp, "%d %b %Y")
def getTime(tweet): def getTime(tweet):
tm = int(tweet.find("span", "_timestamp")["data-time"]) tm = int(tweet.find("span", "_timestamp")["data-time"])
timestamp = str(datetime.timedelta(seconds=tm)) timestamp = str(datetime.timedelta(seconds=tm))
timestamp = timestamp.rpartition(", ")[-1] timestamp = timestamp.rpartition(", ")[-1]
return datetime.datetime.strptime(timestamp, "%H:%M:%S") return datetime.datetime.strptime(timestamp, "%H:%M:%S")
def getText(tweet): def getText(tweet):
text = tweet.find("p", "tweet-text").text text = tweet.find("p", "tweet-text").text
text = text.replace("\n", "") text = text.replace("\n", "")
text = text.replace("http", " http") text = text.replace("http", " http")
text = text.replace("pic.twitter", " pic.twitter") text = text.replace("pic.twitter", " pic.twitter")
return text return text
def getHashtags(text): def getHashtags(text):
hashtag = re.findall(r'(?i)\#\w+', text, flags=re.UNICODE) hashtag = re.findall(r'(?i)\#\w+', text, flags=re.UNICODE)
return hashtag return hashtag
#return ",".join(hashtag) #return ",".join(hashtag)
def getStat(tweet, stat): def getStat(tweet, stat):
st = "ProfileTweet-action--{} u-hiddenVisually".format(stat) st = "ProfileTweet-action--{} u-hiddenVisually".format(stat)
return tweet.find("span", st).find("span")["data-tweet-stat-count"] return tweet.find("span", st).find("span")["data-tweet-stat-count"]
def getMentions(tweet, text): def getMentions(tweet, text):
try: try:
mentions = tweet.find("div", "js-original-tweet")["data-mentions"].split(" ") mentions = tweet.find("div", "js-original-tweet")["data-mentions"].split(" ")
for i in range(len(mentions)): for i in range(len(mentions)):
mention = "@{}".format(mentions[i]) mention = "@{}".format(mentions[i])
if mention not in text: if mention not in text:
text = "{} {}".format(mention, text) text = "{} {}".format(mention, text)
except: except:
pass pass
return text return text
# Sort HTML # Sort HTML
def getTweet(tw, location, config): def getTweet(tw, location, config):
t = Tweet() t = Tweet()
t.id = tw.find("div")["data-item-id"] t.id = tw.find("div")["data-item-id"]
t.date = getDate(tw) t.date = getDate(tw)
if config.Since and config.Until: if config.Since and config.Until:
if (t.date.date() - datetime.datetime.strptime(config.Since, "%Y-%m-%d").date()).days == -1: if (t.date.date() - datetime.datetime.strptime(config.Since, "%Y-%m-%d").date()).days == -1:
# mitigation here, maybe find something better # mitigation here, maybe find something better
sys.exit(0) sys.exit(0)
t.datestamp = t.date.strftime("%Y-%m-%d") t.datestamp = t.date.strftime("%Y-%m-%d")
t.time = getTime(tw) t.time = getTime(tw)
t.timestamp = t.time.strftime("%H:%M:%S") t.timestamp = t.time.strftime("%H:%M:%S")
t.user_id = tw.find("a", "account-group js-account-group js-action-profile js-user-profile-link js-nav")["data-user-id"] t.user_id = tw.find("a", "account-group js-account-group js-action-profile js-user-profile-link js-nav")["data-user-id"]
t.username = tw.find("span", "username").text.replace("@", "") t.username = tw.find("span", "username").text.replace("@", "")
t.timezone = strftime("%Z", gmtime()) t.timezone = strftime("%Z", gmtime())
for img in tw.findAll("img", "Emoji Emoji--forText"): for img in tw.findAll("img", "Emoji Emoji--forText"):
img.replaceWith("<{}>".format(img['aria-label'])) img.replaceWith("<{}>".format(img['aria-label']))
t.tweet = getMentions(tw, getText(tw)) t.tweet = getMentions(tw, getText(tw))
t.location = location t.location = location
t.hashtags = getHashtags(t.tweet) t.hashtags = getHashtags(t.tweet)
t.replies = getStat(tw, "reply") t.replies = getStat(tw, "reply")
t.retweets = getStat(tw, "retweet") t.retweets = getStat(tw, "retweet")
t.likes = getStat(tw, "favorite") t.likes = getStat(tw, "favorite")
t.link = "https://twitter.com/{0.username}/status/{0.id}/".format(t) t.link = "https://twitter.com/{0.username}/status/{0.id}/".format(t)
return t return t
async def getUser(user): async def getUser(user):
u = User() u = User()
u.name = user.find("a")["name"] u.name = user.find("a")["name"]
return u return u
async def Tweets(tw, location, config, conn): async def Tweets(tw, location, config, conn):
copyright = tw.find("div", "StreamItemContent--withheld") copyright = tw.find("div", "StreamItemContent--withheld")
if copyright is None: if copyright is None:
Tweet = getTweet(tw, location, config) Tweet = getTweet(tw, location, config)
if config.Database: if config.Database:
db.tweets(conn, Tweet) db.tweets(conn, Tweet)
if config.Elasticsearch: if config.Elasticsearch:
elasticsearch.Elastic(Tweet, config) elasticsearch.Elastic(Tweet, config)
if config.Users_only: if config.Users_only:
output = Tweet.username output = Tweet.username
elif config.Tweets_only: elif config.Tweets_only:
output = Tweet.tweet output = Tweet.tweet
elif config.Format: elif config.Format:
output = config.Format.replace("{id}", Tweet.id) output = config.Format.replace("{id}", Tweet.id)
output = output.replace("{date}", Tweet.datestamp) output = output.replace("{date}", Tweet.datestamp)
output = output.replace("{time}", Tweet.timestamp) output = output.replace("{time}", Tweet.timestamp)
output = output.replace("{user_id}", Tweet.user_id) output = output.replace("{user_id}", Tweet.user_id)
output = output.replace("{username}", Tweet.username) output = output.replace("{username}", Tweet.username)
output = output.replace("{timezone}", Tweet.timezone) output = output.replace("{timezone}", Tweet.timezone)
output = output.replace("{tweet}", Tweet.tweet) output = output.replace("{tweet}", Tweet.tweet)
output = output.replace("{location}", Tweet.location) output = output.replace("{location}", Tweet.location)
output = output.replace("{hashtags}", str(Tweet.hashtags)) output = output.replace("{hashtags}", str(Tweet.hashtags))
output = output.replace("{replies}", Tweet.replies) output = output.replace("{replies}", Tweet.replies)
output = output.replace("{retweets}", Tweet.retweets) output = output.replace("{retweets}", Tweet.retweets)
output = output.replace("{likes}", Tweet.likes) output = output.replace("{likes}", Tweet.likes)
output = output.replace("{link}", Tweet.link) output = output.replace("{link}", Tweet.link)
else: else:
output = "{} {} {} {} <{}> {}".format(Tweet.id, Tweet.datestamp, Tweet.timestamp, Tweet.timezone, Tweet.username, Tweet.tweet) output = "{} {} {} {} <{}> {}".format(Tweet.id, Tweet.datestamp, Tweet.timestamp, Tweet.timezone, Tweet.username, Tweet.tweet)
if config.Show_hashtags: if config.Show_hashtags:
output+= " {}".format(",".join(Tweet.hashtags)) output+= " {}".format(",".join(Tweet.hashtags))
if config.Stats: if config.Stats:
output+= " | {} replies {} retweets {} likes".format(Tweet.replies, Tweet.retweets, Tweet.likes) output+= " | {} replies {} retweets {} likes".format(Tweet.replies, Tweet.retweets, Tweet.likes)
if config.Location: if config.Location:
output+= " | Location {}".format(Tweet.location) output+= " | Location {}".format(Tweet.location)
if config.Output != None: if config.Output != None:
if config.Store_csv: if config.Store_csv:
writeCSV(Tweet, config.Output) writeCSV(Tweet, config.Output)
elif config.Store_json: elif config.Store_json:
writeJSON(Tweet, config.Output) writeJSON(Tweet, config.Output)
else: else:
write(output, config.Output) write(output, config.Output)
# Print output # Print output
if config.Elasticsearch: if config.Elasticsearch:
print(output, end=".", flush=True) print(output, end=".", flush=True)
else: else:
print(output) print(output)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment