Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
T
Twint
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Packages
Packages
List
Container Registry
Analytics
Analytics
CI / CD
Code Review
Insights
Issues
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nanahira
Twint
Commits
41462277
Commit
41462277
authored
Oct 21, 2018
by
Francesco Poldi
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Revert "Merge remote-tracking branch 'origin/master'"
This reverts commit 4c27b2b82cb84e7b8d5fac206b0b8502a5faba4e.
parent
342a6955
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
140 additions
and
450 deletions
+140
-450
elasticsearch/index-tweets.json
elasticsearch/index-tweets.json
+1
-16
elasticsearch/index-user.json
elasticsearch/index-user.json
+2
-3
twint/get.py
twint/get.py
+0
-9
twint/output.py
twint/output.py
+2
-23
twint/run.py
twint/run.py
+0
-3
twint/storage/db.py
twint/storage/db.py
+75
-257
twint/storage/elasticsearch.py
twint/storage/elasticsearch.py
+4
-20
twint/tweet.py
twint/tweet.py
+45
-102
twint/user.py
twint/user.py
+11
-17
No files found.
elasticsearch/index-tweets.json
View file @
41462277
...
...
@@ -4,11 +4,8 @@ PUT twinttweets
"items"
:
{
"properties"
:
{
"id"
:
{
"type"
:
"long"
},
"conversation_id"
:
{
"type"
:
"text"
},
"created_at"
:
{
"type"
:
"text"
},
"date"
:
{
"type"
:
"date"
,
"format"
:
"yyyy-MM-dd HH:mm:ss"
},
"timezone"
:
{
"type"
:
"text"
},
"place"
:
{
"type"
:
"text"
},
"location"
:
{
"type"
:
"text"
},
"hashtags"
:
{
"type"
:
"text"
},
"tweet"
:
{
"type"
:
"text"
},
...
...
@@ -16,28 +13,16 @@ PUT twinttweets
"retweets"
:
{
"type"
:
"boolean"
},
"likes"
:
{
"type"
:
"boolean"
},
"user_id"
:
{
"type"
:
"keyword"
},
"user_id_str"
:
{
"type"
:
"text"
},
"username"
:
{
"type"
:
"keyword"
},
"name"
:
{
"type"
:
"text"
},
"profile_image_url"
:
{
"type"
:
"text"
},
"day"
:
{
"type"
:
"integer"
},
"hour"
:
{
"type"
:
"integer"
},
"link"
:
{
"type"
:
"text"
},
"gif_url"
:
{
"type"
:
"text"
},
"gif_thumb"
:
{
"type"
:
"text"
},
"video_url"
:
{
"type"
:
"text"
},
"video_thumb"
:
{
"type"
:
"text"
},
"is_reply_to"
:
{
"type"
:
"integer"
},
"has_parent_tweet"
:
{
"type"
:
"integer"
},
"retweet"
:
{
"type"
:
"text"
},
"user_rt"
:
{
"type"
:
"text"
},
"essid"
:
{
"type"
:
"keyword"
},
"nlikes"
:
{
"type"
:
"integer"
},
"nreplies"
:
{
"type"
:
"integer"
},
"nretweets"
:
{
"type"
:
"integer"
},
"is_quote_status"
:
{
"type"
:
"integer"
},
"quote_id"
:
{
"type"
:
"long"
},
"quote_id_str"
:
{
"type"
:
"text"
},
"quote_url"
:
{
"type"
:
"text"
},
"search"
:
{
"type"
:
"text"
}
}
}
...
...
elasticsearch/index-user.json
View file @
41462277
...
...
@@ -17,10 +17,9 @@ PUT twintuser
"followers"
:
{
"type"
:
"integer"
},
"likes"
:
{
"type"
:
"integer"
},
"media"
:
{
"type"
:
"integer"
},
"private"
:
{
"type"
:
"
integer
"
},
"verified"
:
{
"type"
:
"
integer
"
},
"private"
:
{
"type"
:
"
boolean
"
},
"verified"
:
{
"type"
:
"
boolean
"
},
"avatar"
:
{
"type"
:
"text"
},
"background_image"
:
{
"type"
:
"text"
},
"session"
:
{
"type"
:
"keyword"
}
}
}
...
...
twint/get.py
View file @
41462277
...
...
@@ -10,7 +10,6 @@ from aiohttp_socks import SocksConnector, SocksVer
from
.
import
url
from
.output
import
Tweets
,
Users
from
.user
import
inf
#import logging
...
...
@@ -103,14 +102,6 @@ async def Username(_id):
return
soup
.
find
(
"a"
,
"fn url alternate-context"
)[
"href"
]
.
replace
(
"/"
,
""
)
async
def
UserId
(
username
):
#loggin.info("[<] " + str(datetime.now()) + ':: get+UserId')
url
=
f
"http://twitter.com/{username}?lang=en"
r
=
await
Request
(
url
)
soup
=
BeautifulSoup
(
r
,
"html.parser"
)
return
int
(
inf
(
soup
,
"id"
))
async
def
Tweet
(
url
,
config
,
conn
):
#loggin.info("[<] " + str(datetime.now()) + ':: Tweet')
try
:
...
...
twint/output.py
View file @
41462277
from
.
import
format
,
get
from
.
import
format
from
.tweet
import
Tweet
from
.user
import
User
from
datetime
import
datetime
...
...
@@ -78,31 +78,11 @@ def _output(obj, output, config, **extra):
except
UnicodeEncodeError
:
print
(
"unicode error [x] output._output"
)
async
def
tweetUserData
(
tweet
,
config
,
conn
):
user_ids
=
set
()
usernames
=
[]
for
user
in
tweet
.
mentions
:
if
db
.
get_user_id
(
conn
,
user
[
"id"
])
==
-
1
and
user
[
"id"
]
not
in
user_ids
:
user_ids
.
add
(
user
[
"id"
])
usernames
.
append
(
user
[
"screen_name"
])
for
user
in
tweet
.
tags
:
if
db
.
get_user_id
(
conn
,
user
[
"id"
])
==
-
1
and
user
[
"id"
]
not
in
user_ids
:
user_ids
.
add
(
user
[
"id"
])
usernames
.
append
(
user
[
"screen_name"
])
for
user
in
tweet
.
replies
:
if
db
.
get_user_id
(
conn
,
user
[
"id"
])
==
-
1
and
user
[
"id"
]
not
in
user_ids
:
user_ids
.
add
(
user
[
"id"
])
usernames
.
append
(
user
[
"screen_name"
])
for
user
in
usernames
:
url
=
f
"http://twitter.com/{user}?lang=en"
await
get
.
User
(
url
,
config
,
conn
)
async
def
Tweets
(
tw
,
location
,
config
,
conn
):
#logging.info("[<] " + str(datetime.now()) + ':: output+Tweets')
copyright
=
tw
.
find
(
"div"
,
"StreamItemContent--withheld"
)
if
copyright
is
None
and
is_tweet
(
tw
):
tweet
=
Tweet
(
tw
,
location
,
config
)
await
tweetUserData
(
tweet
,
config
,
conn
)
if
datecheck
(
tweet
.
datestamp
,
config
):
output
=
format
.
Tweet
(
config
,
tweet
)
...
...
@@ -128,8 +108,7 @@ async def Users(u, config, conn):
output
=
format
.
User
(
config
.
Format
,
user
)
if
config
.
Database
:
#db.user(conn, config.Username, config.Followers, user)
db
.
user
(
conn
,
config
,
user
)
db
.
user
(
conn
,
config
.
Username
,
config
.
Followers
,
user
)
if
config
.
Elasticsearch
:
_save_date
=
user
.
join_date
...
...
twint/run.py
View file @
41462277
...
...
@@ -94,9 +94,6 @@ class Twint:
if
self
.
config
.
User_id
is
not
None
:
self
.
config
.
Username
=
await
get
.
Username
(
self
.
config
.
User_id
)
if
self
.
config
.
Username
is
not
None
:
self
.
config
.
User_id
=
await
get
.
UserId
(
self
.
config
.
Username
)
if
self
.
config
.
TwitterSearch
and
self
.
config
.
Since
and
self
.
config
.
Until
:
_days
=
timedelta
(
days
=
int
(
self
.
config
.
Timedelta
))
while
self
.
d
.
_since
<
self
.
d
.
_until
:
...
...
twint/storage/db.py
View file @
41462277
...
...
@@ -18,181 +18,76 @@ def init(db):
try
:
conn
=
sqlite3
.
connect
(
db
)
cursor
=
conn
.
cursor
()
table_users
=
"""
CREATE TABLE IF NOT EXISTS
users(
id integer not null,
id_str text not null,
name text,
username text not null,
bio text,
location text,
url text,
join_date text not null,
join_time text not null,
tweets integer,
following integer,
followers integer,
likes integer,
media integer,
private integer not null,
verified integer not null,
profile_image_url text not null,
background_image text,
date_update text not null,
CONSTRAINT users_pk PRIMARY KEY (id)
);
"""
cursor
.
execute
(
table_users
)
table_tweets
=
"""
CREATE TABLE IF NOT EXISTS
tweets (
id integer not null,
id_str text not null,
tweet text default '',
conversation_id text not null,
created_at integer not null,
user_id integer,
date text not null,
time text not null,
timezone text not null,
place text default '',
location text not null,
replies_count integer,
likes_count integer,
retweets_count integer,
user_id integer not null,
user_id_str text not null,
screen_name text not null,
name text default '',
profile_image_url text,
user text not null,
tweet text not null,
replies integer,
likes integer,
retweets integer,
hashtags text,
link text,
gif_url text,
gif_thumb text,
video_url text,
video_thumb text,
is_reply_to integer,
has_parent_tweet integer,
in_reply_to_screen_name text defualt '',
in_reply_to_status_id integer,
in_reply_to_status_id_str text default '',
in_reply_to_user_id integer,
in_reply_to_user_id_str text default '',
is_quote_status integer,
quote_id integer,
quote_id_str text,
quote_url text,
retweet bool,
user_rt text,
mentions text,
date_update text not null,
PRIMARY KEY (id)
);
"""
cursor
.
execute
(
table_tweets
)
table_retweets
=
"""
CREATE TABLE IF NOT EXISTS
retweets(
user_id integer not null,
tweet_id integer not null,
CONSTRAINT retweets_pk PRIMARY KEY(user_id, tweet_id),
CONSTRAINT user_id_fk FOREIGN KEY(user_id) REFERENCES users(id),
CONSTRAINT tweet_id_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id)
);
"""
cursor
.
execute
(
table_retweets
)
table_mentions
=
"""
CREATE TABLE IF NOT EXISTS
mentions(
tweet_id integer not null,
id integer not null,
id_str text not null,
screen_name text not null,
CONSTRAINT mentions_pk PRIMARY KEY(tweet_id,id),
CONSTRAINT tweet_id_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id)
CONSTRAINT user_id_fk FOREIGN KEY(id) REFERENCES users(id)
);
"""
cursor
.
execute
(
table_mentions
)
table_replies
=
"""
CREATE TABLE IF NOT EXISTS
replies(
tweet_id integer not null,
id integer not null,
id_str text not null,
screen_name text not null,
CONSTRAINT replies_pk PRIMARY KEY(tweet_id,id),
CONSTRAINT tweet_id_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id)
CONSTRAINT user_id_fk FOREIGN KEY(id) REFERENCES users(id)
);
"""
cursor
.
execute
(
table_replies
)
table_tags
=
"""
CREATE TABLE IF NOT EXISTS
tags(
tweet_id integer not null,
id integer not null,
id_str text not null,
screen_name text not null,
CONSTRAINT tags_pk PRIMARY KEY(tweet_id, id),
CONSTRAINT tweet_id_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id),
CONSTRAINT user_id_fk FOREIGN KEY(id) REFERENCES users(id)
);
"""
cursor
.
execute
(
table_tags
)
table_hashtags
=
"""
CREATE TABLE IF NOT EXISTS
hashtags(
tweet_id integer not null,
tag_name text not null,
CONSTRAINT tweet_id_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id)
);
"""
cursor
.
execute
(
table_hashtags
)
table_urls
=
"""
table_followers_names
=
"""
CREATE TABLE IF NOT EXISTS
urls(
tweet_id integer not null,
url text not null,
CONSTRAINT urls_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id)
followers_names (
user text not null,
date_update text not null,
follower text not null,
PRIMARY KEY (user, follower)
);
"""
cursor
.
execute
(
table_urls
)
table_photos
=
"""
CREATE TABLE IF NOT EXISTS
photos(
tweet_id integer not null,
url text not null,
CONSTRAINT photos_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id)
);
"""
cursor
.
execute
(
table_
photo
s
)
cursor
.
execute
(
table_
followers_name
s
)
table_f
avorites
=
"""
table_f
ollowing_names
=
"""
CREATE TABLE IF NOT EXISTS
favorites(
user_id integer not null,
tweet_id integer not null,
CONSTRAINT favorites_pk PRIMARY KEY (user_id, tweet_id),
CONSTRAINT user_id_fk FOREIGN KEY (user_id) REFERENCES users(id),
CONSTRAINT tweet_id_fk FOREIGN KEY (tweet_id) REFERENCES tweets(id)
following_names (
user text not null,
date_update text not null,
follows text not null,
PRIMARY KEY (user, follows)
);
"""
cursor
.
execute
(
table_f
avorit
es
)
cursor
.
execute
(
table_f
ollowing_nam
es
)
table_followers
=
"""
CREATE TABLE IF NOT EXISTS
followers (
id integer not null,
follower_id integer not null,
CONSTRAINT followers_pk PRIMARY KEY (id, follower_id),
CONSTRAINT id_fk FOREIGN KEY(id) REFERENCES users(id),
CONSTRAINT follower_id_fk FOREIGN KEY(follower_id) REFERENCES users(id)
name text,
username text not null,
bio text,
location,
url text,
join_date text not null,
join_time text not null,
tweets integer,
following integer,
followers integer,
likes integer,
media integer,
private text not null,
verified text not null,
avatar text not null,
date_update text not null,
follower text not null,
PRIMARY KEY (id, username, follower)
);
"""
cursor
.
execute
(
table_followers
)
...
...
@@ -201,35 +96,27 @@ def init(db):
CREATE TABLE IF NOT EXISTS
following (
id integer not null,
following_id integer not null,
CONSTRAINT following_pk PRIMARY KEY (id, following_id),
CONSTRAINT id_fk FOREIGN KEY(id) REFERENCES users(id),
CONSTRAINT following_id_fk FOREIGN KEY(following_id) REFERENCES users(id)
);
"""
cursor
.
execute
(
table_following
)
table_followers_names
=
"""
CREATE TABLE IF NOT EXISTS
followers_names (
user text not null,
date_update text not null,
follower text not null,
PRIMARY KEY (user, follower)
);
"""
cursor
.
execute
(
table_followers_names
)
table_following_names
=
"""
CREATE TABLE IF NOT EXISTS
following_names (
user text not null,
name text,
username text not null,
bio text,
location text,
url text,
join_date text not null,
join_time text not null,
tweets integer,
following integer,
followers integer,
likes integer,
media integer,
private text not null,
verified text not null,
avatar text not null,
date_update text not null,
follows text not null,
PRIMARY KEY (
user
, follows)
PRIMARY KEY (
id, username
, follows)
);
"""
cursor
.
execute
(
table_following
_names
)
cursor
.
execute
(
table_following
)
return
conn
except
Exception
as
e
:
...
...
@@ -263,12 +150,11 @@ def follow(conn, Username, Followers, User):
except
sqlite3
.
IntegrityError
:
pass
def
user
(
conn
,
config
,
User
):
def
user
(
conn
,
Username
,
Followers
,
User
):
try
:
date_time
=
str
(
datetime
.
now
())
cursor
=
conn
.
cursor
()
entry
=
(
int
(
User
.
id
),
User
.
id
,
entry
=
(
User
.
id
,
User
.
name
,
User
.
username
,
User
.
bio
,
...
...
@@ -284,105 +170,37 @@ def user(conn, config, User):
User
.
is_private
,
User
.
is_verified
,
User
.
avatar
,
User
.
background_image
,
date_time
)
query
=
f
"INSERT INTO users VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"
date_time
,
Username
,)
table
=
uTable
(
Followers
)
query
=
f
"INSERT INTO {table} VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"
cursor
.
execute
(
query
,
entry
)
if
config
.
Followers
or
config
.
Following
:
table
=
uTable
(
config
.
Followers
)
query
=
f
"INSERT INTO {table} VALUES(?,?)"
cursor
.
execute
(
query
,
(
config
.
User_id
,
int
(
User
.
id
)))
conn
.
commit
()
except
sqlite3
.
IntegrityError
:
pass
def
get_user_id
(
conn
,
id
):
cursor
=
conn
.
cursor
()
cursor
.
execute
(
'SELECT id FROM users WHERE id = ? LIMIT 1'
,
(
id
,))
resultset
=
cursor
.
fetchall
()
return
resultset
[
0
][
0
]
if
resultset
else
-
1
def
tweets
(
conn
,
Tweet
,
config
):
try
:
date_time
=
str
(
datetime
.
now
())
cursor
=
conn
.
cursor
()
entry
=
(
Tweet
.
id
,
Tweet
.
id_str
,
Tweet
.
tweet
,
Tweet
.
conversation_id
,
Tweet
.
datetime
,
Tweet
.
user_id
,
Tweet
.
datestamp
,
Tweet
.
timestamp
,
Tweet
.
timezone
,
Tweet
.
place
,
Tweet
.
location
,
Tweet
.
replies_count
,
Tweet
.
likes_count
,
Tweet
.
retweets_count
,
Tweet
.
user_id
,
Tweet
.
user_id_str
,
Tweet
.
username
,
Tweet
.
name
,
Tweet
.
profile_image_url
,
Tweet
.
tweet
,
Tweet
.
replies
,
Tweet
.
likes
,
Tweet
.
retweets
,
","
.
join
(
Tweet
.
hashtags
),
Tweet
.
link
,
Tweet
.
gif_url
,
Tweet
.
gif_thumb
,
Tweet
.
video_url
,
Tweet
.
video_thumb
,
Tweet
.
is_reply_to
,
Tweet
.
has_parent_tweet
,
Tweet
.
in_reply_to_screen_name
,
Tweet
.
in_reply_to_status_id
,
Tweet
.
in_reply_to_status_id_str
,
Tweet
.
in_reply_to_user_id
,
Tweet
.
in_reply_to_user_id_str
,
Tweet
.
is_quote_status
,
Tweet
.
quote_id
,
Tweet
.
quote_id_str
,
Tweet
.
quote_url
,
Tweet
.
retweet
,
Tweet
.
user_rt
,
","
.
join
(
Tweet
.
mentions
),
date_time
)
cursor
.
execute
(
'INSERT INTO tweets VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)'
,
entry
)
if
len
(
Tweet
.
mentions
)
>
0
:
query
=
'INSERT INTO mentions VALUES(?, ?, ?, ?)'
for
mention
in
Tweet
.
mentions
:
cursor
.
execute
(
query
,
(
Tweet
.
id
,
mention
[
"id"
],
mention
[
"id_str"
],
mention
[
"screen_name"
]))
if
len
(
Tweet
.
replies
)
>
0
:
query
=
'INSERT INTO replies VALUES(?, ?, ?, ?)'
for
reply
in
Tweet
.
replies
:
cursor
.
execute
(
query
,
(
Tweet
.
id
,
reply
[
"id"
],
reply
[
"id_str"
],
reply
[
"screen_name"
]))
if
len
(
Tweet
.
tags
)
>
0
:
query
=
'INSERT INTO tags VALUES(?, ?, ?, ?)'
for
tag
in
Tweet
.
tags
:
cursor
.
execute
(
query
,
(
Tweet
.
id
,
tag
[
"id"
],
tag
[
"id_str"
],
tag
[
"screen_name"
]))
if
len
(
Tweet
.
hashtags
)
>
0
:
query
=
'INSERT OR IGNORE INTO hashtags (tweet_id, tag_name) VALUES(?,?)'
for
tag
in
Tweet
.
hashtags
:
cursor
.
execute
(
query
,
(
Tweet
.
id
,
tag
))
if
len
(
Tweet
.
urls
)
>
0
:
query
=
'INSERT INTO urls VALUES(?, ?)'
for
url
in
Tweet
.
urls
:
cursor
.
execute
(
query
,
(
Tweet
.
id
,
url
))
if
len
(
Tweet
.
photos
)
>
0
:
query
=
'INSERT INTO photos VALUES(?, ?)'
for
photo
in
Tweet
.
photos
:
cursor
.
execute
(
query
,
(
Tweet
.
id
,
photo
))
if
config
.
Favorites
:
query
=
'INSERT INTO favorites VALUES(?,?)'
cursor
.
execute
(
query
,
(
config
.
User_id
,
Tweet
.
id
))
if
Tweet
.
retweet
==
1
:
query
=
'INSERT INTO retweets VALUES(?,?)'
cursor
.
execute
(
query
,
(
config
.
User_id
,
Tweet
.
id
))
cursor
.
execute
(
'INSERT INTO tweets VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)'
,
entry
)
conn
.
commit
()
except
sqlite3
.
IntegrityError
:
pass
twint/storage/elasticsearch.py
View file @
41462277
...
...
@@ -171,37 +171,22 @@ def Tweet(Tweet, config):
"_id"
:
Tweet
.
id
+
"_raw_"
+
config
.
Essid
,
"_source"
:
{
"id"
:
Tweet
.
id
,
"conversation_id"
:
Tweet
.
conversation_id
,
"created_at"
:
Tweet
.
created_at
,
"date"
:
dt
,
"timezone"
:
Tweet
.
timezone
,
"place"
:
Tweet
.
place
,
"location"
:
Tweet
.
location
,
"tweet"
:
Tweet
.
tweet
,
"hashtags"
:
Tweet
.
hashtags
,
"user_id"
:
Tweet
.
user_id
,
"user_id_str"
:
Tweet
.
user_id_str
,
"username"
:
Tweet
.
username
,
"name"
:
Tweet
.
name
,
"profile_image_url"
:
Tweet
.
profile_image_url
,
"day"
:
day
,
"hour"
:
hour
(
Tweet
.
datetime
),
"link"
:
Tweet
.
link
,
"gif_url"
:
Tweet
.
gif_url
,
"gif_thumb"
:
Tweet
.
gif_thumb
,
"video_url"
:
Tweet
.
video_url
,
"video_thumb"
:
Tweet
.
video_thumb
,
"is_reply_to"
:
Tweet
.
is_reply_to
,
"has_parent_tweet"
:
Tweet
.
has_parent_tweet
,
"retweet"
:
Tweet
.
retweet
,
"user_rt"
:
Tweet
.
user_rt
,
"essid"
:
config
.
Essid
,
"nlikes"
:
int
(
Tweet
.
likes_count
),
"nreplies"
:
int
(
Tweet
.
replies_count
),
"nretweets"
:
int
(
Tweet
.
retweets_count
),
"is_quote_status"
:
Tweet
.
is_quote_status
,
"quote_id"
:
Tweet
.
quote_id
,
"quote_id_str"
:
Tweet
.
quote_id_str
,
"quote_url"
:
Tweet
.
quote_url
,
"nlikes"
:
int
(
Tweet
.
likes
),
"nreplies"
:
int
(
Tweet
.
replies
),
"nretweets"
:
int
(
Tweet
.
retweets
),
"search"
:
str
(
config
.
Search
)
}
}
...
...
@@ -345,7 +330,6 @@ def UserProfile(user, config):
"private"
:
user
.
is_private
,
"verified"
:
user
.
is_verified
,
"avatar"
:
user
.
avatar
,
"background_image"
:
user
.
background_image
,
"session"
:
config
.
Essid
}
}
...
...
twint/tweet.py
View file @
41462277
from
time
import
strftime
,
localtime
import
json
import
re
#from datetime import datetime
#import logging
...
...
@@ -11,84 +11,16 @@ class tweet:
def
__init__
(
self
):
pass
def
getRawURLS
(
tw
,
link
,
config
):
player
=
tw
.
find_all
(
"div"
,
"PlayableMedia-player"
)
gif_url
,
gif_thumb
,
video_url
,
video_thumb
=
""
,
""
,
""
,
""
for
node
in
player
:
styles
=
node
.
attrs
[
'style'
]
.
split
()
for
style
in
styles
:
if
style
.
startswith
(
'background'
):
tmp
=
"background-image:url('"
style
=
style
.
replace
(
tmp
,
""
)
if
"tweet_video_thumb"
in
style
:
gif_url
=
style
.
replace
(
"')"
,
''
)
gif_url
=
gif_url
.
replace
(
'.jpg'
,
'.mp4'
)
gif_url
=
gif_url
.
replace
(
'https://pbs'
,
'https://video'
)
gif_url
=
gif_url
.
replace
(
"_thumb"
,
""
)
gif_thumb
=
style
.
replace
(
"')"
,
""
)
else
:
video_url
,
video_thumb
=
"video"
,
"video_thumb"
return
gif_url
,
gif_thumb
,
video_url
,
video_thumb
def
getMentions
(
tw
):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getMentions')
"""Extract ment from tweet
"""
mentions
=
[{
"id"
:
int
(
mention
[
"data-mentioned-user-id"
]),
"id_str"
:
mention
[
"data-mentioned-user-id"
],
"screen_name"
:
mention
.
get
(
'href'
)
.
split
(
"/"
)[
-
1
]}
for
mention
in
tw
.
find_all
(
'a'
,{
'class'
:
'twitter-atreply'
})]
return
mentions
def
getReplies
(
tw
):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getReplies')
"""Extract replies from tweet
"""
replyToUsersJSON
=
json
.
loads
(
tw
.
find
(
"div"
)[
"data-reply-to-users-json"
])
replies
=
[{
"id"
:
int
(
reply
[
"id_str"
]),
"id_str"
:
reply
[
"id_str"
],
"screen_name"
:
reply
[
"screen_name"
]}
for
reply
in
replyToUsersJSON
]
return
replies
def
getTags
(
tw
):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getTags')
"""Extract tags from tweet
"""
tags
=
[]
try
:
tag_links
=
tw
.
find
(
"div"
,
"media-tagging-block"
)
.
find_all
(
"a"
,
"js-user-profile-link"
)
for
tag
in
tag_links
:
if
tag
.
has_attr
(
"data-user-id"
):
tmpData
=
{
"id"
:
int
(
tag
[
"data-user-id"
]),
"id_str"
:
tag
[
"data-user-id"
],
"screen_name"
:
tag
.
get
(
'href'
)
.
split
(
"/"
)[
-
1
]
}
tags
.
append
(
tmpData
)
except
:
tags
=
[]
return
tags
def
getQuoteInfo
(
tw
):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getQuoteInfo')
"""Extract quote from tweet
"""
base_twitter
=
"https://twitter.com"
quote_status
=
0
quote_id
=
0
quote_id_str
=
""
quote_url
=
""
try
:
quote
=
tw
.
find
(
"div"
,
"QuoteTweet-innerContainer"
)
quote_status
=
1
quote_id
=
int
(
quote
[
"data-item-id"
])
quote_id_str
=
quote
[
"data-item-id"
]
quote_url
=
base_twitter
+
quote
.
get
(
"href"
)
mentions
=
tw
.
find
(
"div"
,
"js-original-tweet"
)[
"data-mentions"
]
.
split
(
" "
)
except
:
quote_status
=
0
mentions
=
""
return
quote_status
,
quote_id
,
quote_id_str
,
quote_url
return
mentions
def
getText
(
tw
):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getText')
...
...
@@ -101,6 +33,25 @@ def getText(tw):
return
text
def
getTweet
(
tw
,
mentions
):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getTweet')
try
:
text
=
getText
(
tw
)
for
i
in
range
(
len
(
mentions
)):
mention
=
f
"@{mentions[i]}"
if
mention
not
in
text
:
text
=
f
"{mention} {text}"
except
:
text
=
getText
(
tw
)
return
text
def
getHashtags
(
text
):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getHashtags')
"""Get hashtags of tweet
"""
return
re
.
findall
(
r'(?i)\#\w+'
,
text
,
flags
=
re
.
UNICODE
)
def
getStat
(
tw
,
_type
):
"""Get stats about Tweet
"""
...
...
@@ -110,50 +61,42 @@ def getStat(tw, _type):
def
getRetweet
(
profile
,
username
,
user
):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getRetweet')
if
profile
and
username
.
lower
()
!=
user
.
lower
():
return
1
if
profile
and
username
.
lower
()
!=
user
:
return
True
def
getUser_rt
(
profile
,
username
,
user
):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getUser_rt')
"""Get username that retweeted
"""
if
getRetweet
(
profile
,
username
,
user
):
user_rt
=
user
else
:
user_rt
=
"None"
return
user_rt
def
Tweet
(
tw
,
location
,
config
):
"""Create Tweet object
"""
##logging.info("[<] " + str(datetime.now()) + ':: tweet+Tweet')
t
=
tweet
()
t
.
id
=
int
(
tw
.
find
(
"div"
)[
"data-item-id"
])
t
.
id_str
=
tw
.
find
(
"div"
)[
"data-item-id"
]
t
.
conversation_id
=
tw
.
find
(
"div"
)[
"data-conversation-id"
]
t
.
id
=
tw
.
find
(
"div"
)[
"data-item-id"
]
t
.
datetime
=
int
(
tw
.
find
(
"span"
,
"_timestamp"
)[
"data-time"
])
t
.
datestamp
=
strftime
(
"
%
Y-
%
m-
%
d"
,
localtime
(
t
.
datetime
))
t
.
timestamp
=
strftime
(
"
%
H:
%
M:
%
S"
,
localtime
(
t
.
datetime
))
t
.
user_id
=
int
(
tw
.
find
(
"div"
)[
"data-user-id"
])
t
.
user_id_str
=
tw
.
find
(
"div"
)[
"data-user-id"
]
t
.
username
=
tw
.
find
(
"div"
)[
"data-screen-name"
]
t
.
name
=
tw
.
find
(
"div"
)[
"data-name"
]
t
.
profile_image_url
=
tw
.
find
(
"img"
,
"js-action-profile-avatar"
)
.
get
(
'src'
)
.
replace
(
"_bigger"
,
""
)
t
.
place
=
tw
.
find
(
"a"
,
"js-geo-pivot-link"
)
.
text
.
strip
()
if
tw
.
find
(
"a"
,
"js-geo-pivot-link"
)
else
None
t
.
user_id
=
tw
.
find
(
"a"
,
"account-group js-account-group js-action-profile js-user-profile-link js-nav"
)[
"data-user-id"
]
t
.
username
=
tw
.
find
(
"span"
,
"username"
)
.
text
.
replace
(
"@"
,
""
)
t
.
timezone
=
strftime
(
"
%
Z"
,
localtime
())
for
img
in
tw
.
findAll
(
"img"
,
"Emoji Emoji--forText"
):
img
.
replaceWith
(
img
[
"alt"
])
t
.
mentions
=
getMentions
(
tw
)
t
.
tags
=
getTags
(
tw
)
t
.
replies
=
getReplies
(
tw
)
t
.
urls
=
[
link
.
attrs
[
"data-expanded-url"
]
for
link
in
tw
.
find_all
(
'a'
,{
'class'
:
'twitter-timeline-link'
})
if
link
.
has_attr
(
"data-expanded-url"
)]
t
.
photos
=
[
photo_node
.
attrs
[
'data-image-url'
]
for
photo_node
in
tw
.
find_all
(
"div"
,
"AdaptiveMedia-photoContainer"
)]
t
.
tweet
=
getText
(
tw
)
t
.
tweet
=
getTweet
(
tw
,
t
.
mentions
)
t
.
location
=
location
t
.
hashtags
=
[
hashtag
.
text
for
hashtag
in
tw
.
find_all
(
"a"
,
"twitter-hashtag"
)]
t
.
replies
_count
=
getStat
(
tw
,
"reply"
)
t
.
retweets
_count
=
getStat
(
tw
,
"retweet"
)
t
.
likes
_count
=
getStat
(
tw
,
"favorite"
)
t
.
hashtags
=
getHashtags
(
t
.
tweet
)
t
.
replies
=
getStat
(
tw
,
"reply"
)
t
.
retweets
=
getStat
(
tw
,
"retweet"
)
t
.
likes
=
getStat
(
tw
,
"favorite"
)
t
.
link
=
f
"https://twitter.com/{t.username}/status/{t.id}"
t
.
retweet
=
getRetweet
(
config
.
Profile
,
t
.
username
,
config
.
Username
)
t
.
gif_url
,
t
.
gif_thumb
,
t
.
video_url
,
t
.
video_thumb
=
getRawURLS
(
tw
,
t
.
link
,
config
)
t
.
is_quote_status
,
t
.
quote_id
,
t
.
quote_id_str
,
t
.
quote_url
=
getQuoteInfo
(
tw
)
t
.
is_reply_to
=
int
(
bool
(
tw
.
find
(
"div"
)[
"data-is-reply-to"
]))
if
tw
.
find
(
"div"
)
.
has_attr
(
"data-is-reply-to"
)
else
0
t
.
has_parent_tweet
=
int
(
bool
(
tw
.
find
(
"div"
)[
"data-has-parent-tweet"
]))
if
tw
.
find
(
"div"
)
.
has_attr
(
"data-has-parent-tweet"
)
else
0
t
.
in_reply_to_screen_name
=
""
t
.
in_reply_to_status_id
=
0
t
.
in_reply_to_status_id_str
=
""
t
.
in_reply_to_user_id
=
0
t
.
in_reply_to_user_id_str
=
""
t
.
user_rt
=
getUser_rt
(
config
.
Profile
,
t
.
username
,
config
.
Username
)
return
t
twint/user.py
View file @
41462277
...
...
@@ -20,10 +20,6 @@ def inf(ur, _type):
ret
=
group
[
"data-screen-name"
]
elif
_type
==
"private"
:
ret
=
group
[
"data-protected"
]
if
ret
==
'true'
:
ret
=
1
else
:
ret
=
0
return
ret
...
...
@@ -32,18 +28,18 @@ def card(ur, _type):
try
:
ret
=
ur
.
find
(
"p"
,
"ProfileHeaderCard-bio u-dir"
)
.
text
.
replace
(
"
\n
"
,
" "
)
except
:
ret
=
None
ret
=
"None"
elif
_type
==
"location"
:
try
:
ret
=
ur
.
find
(
"span"
,
"ProfileHeaderCard-locationText u-dir"
)
.
text
ret
=
ret
[
15
:]
.
replace
(
"
\n
"
,
" "
)[:
-
10
]
except
:
ret
=
None
ret
=
"None"
elif
_type
==
"url"
:
try
:
ret
=
ur
.
find
(
"span"
,
"ProfileHeaderCard-urlText u-dir"
)
.
find
(
"a"
)[
"title"
]
except
:
ret
=
None
ret
=
"None"
return
ret
...
...
@@ -58,13 +54,11 @@ def convertToInt(x):
"b"
:
1000000000
,
}
try
:
if
','
in
x
:
x
=
x
.
replace
(
','
,
''
)
y
=
int
(
x
)
return
y
except
:
pass
try
:
y
=
float
(
str
(
x
)[:
-
1
])
y
=
y
*
multDict
[
str
(
x
)[
-
1
:]
.
lower
()]
...
...
@@ -85,10 +79,11 @@ def stat(ur, _type):
def
media
(
ur
):
try
:
media_count
=
ur
.
find
(
"a"
,
"PhotoRail-headingWithCount js-nav"
)
.
text
.
strip
()
.
split
(
" "
)[
0
]
media_count
=
convertToInt
(
media_count
)
media_count
=
ur
.
find
(
"a"
,
"PhotoRail-headingWithCount js-nav"
)
.
text
media_count
=
media_count
.
replace
(
"
\n
"
,
""
)[
32
:]
.
split
(
" "
)[
0
]
media_count
=
convertToInt
(
media_count
)
except
:
media_count
=
0
media_count
=
"0"
return
media_count
...
...
@@ -96,11 +91,11 @@ def verified(ur):
try
:
is_verified
=
ur
.
find
(
"span"
,
"ProfileHeaderCard-badges"
)
.
text
if
"Verified account"
in
is_verified
:
is_verified
=
1
is_verified
=
"true"
else
:
is_verified
=
0
is_verified
=
"false"
except
:
is_verified
=
0
is_verified
=
"false"
return
is_verified
...
...
@@ -124,5 +119,4 @@ def User(ur):
u
.
is_private
=
inf
(
ur
,
"private"
)
u
.
is_verified
=
verified
(
ur
)
u
.
avatar
=
ur
.
find
(
"img"
,
"ProfileAvatar-image"
)[
"src"
]
u
.
background_image
=
ur
.
find
(
'div'
,{
'class'
:
'ProfileCanopy-headerBg'
})
.
find
(
'img'
)
.
get
(
'src'
)
return
u
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment