Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
T
Twint
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Packages
Packages
List
Container Registry
Analytics
Analytics
CI / CD
Code Review
Insights
Issues
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nanahira
Twint
Commits
2cc218cd
Commit
2cc218cd
authored
Oct 25, 2018
by
andytnt
Committed by
Francesco Poldi
Oct 25, 2018
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fix missing commits (#258)
parent
685078ff
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
577 additions
and
268 deletions
+577
-268
elasticsearch/index-user.json
elasticsearch/index-user.json
+4
-3
twint/get.py
twint/get.py
+9
-0
twint/output.py
twint/output.py
+23
-2
twint/run.py
twint/run.py
+3
-0
twint/storage/db.py
twint/storage/db.py
+258
-76
twint/storage/elasticsearch.py
twint/storage/elasticsearch.py
+160
-129
twint/tweet.py
twint/tweet.py
+102
-46
twint/user.py
twint/user.py
+18
-12
No files found.
elasticsearch/index-user.json
View file @
2cc218cd
...
...
@@ -17,9 +17,10 @@ PUT twintuser
"followers"
:
{
"type"
:
"integer"
},
"likes"
:
{
"type"
:
"integer"
},
"media"
:
{
"type"
:
"integer"
},
"private"
:
{
"type"
:
"
boolean
"
},
"verified"
:
{
"type"
:
"
boolean
"
},
"private"
:
{
"type"
:
"
integer
"
},
"verified"
:
{
"type"
:
"
integer
"
},
"avatar"
:
{
"type"
:
"text"
},
"background_image"
:
{
"type"
:
"text"
},
"session"
:
{
"type"
:
"keyword"
}
}
}
...
...
@@ -28,4 +29,4 @@ PUT twintuser
"settings"
:
{
"number_of_shards"
:
1
}
}
}
\ No newline at end of file
twint/get.py
View file @
2cc218cd
...
...
@@ -10,6 +10,7 @@ from aiohttp_socks import SocksConnector, SocksVer
from
.
import
url
from
.output
import
Tweets
,
Users
from
.user
import
inf
#import logging
...
...
@@ -102,6 +103,14 @@ async def Username(_id):
return
soup
.
find
(
"a"
,
"fn url alternate-context"
)[
"href"
]
.
replace
(
"/"
,
""
)
async
def
UserId
(
username
):
#loggin.info("[<] " + str(datetime.now()) + ':: get+UserId')
url
=
f
"http://twitter.com/{username}?lang=en"
r
=
await
Request
(
url
)
soup
=
BeautifulSoup
(
r
,
"html.parser"
)
return
int
(
inf
(
soup
,
"id"
))
async
def
Tweet
(
url
,
config
,
conn
):
#loggin.info("[<] " + str(datetime.now()) + ':: Tweet')
try
:
...
...
twint/output.py
View file @
2cc218cd
from
.
import
format
from
.
import
format
,
get
from
.tweet
import
Tweet
from
.user
import
User
from
datetime
import
datetime
...
...
@@ -78,13 +78,34 @@ def _output(obj, output, config, **extra):
except
UnicodeEncodeError
:
print
(
"unicode error [x] output._output"
)
async
def
tweetUserData
(
tweet
,
config
,
conn
):
user_ids
=
set
()
usernames
=
[]
for
user
in
tweet
.
mentions
:
if
db
.
get_user_id
(
conn
,
user
[
"id"
])
==
-
1
and
user
[
"id"
]
not
in
user_ids
:
user_ids
.
add
(
user
[
"id"
])
usernames
.
append
(
user
[
"screen_name"
])
for
user
in
tweet
.
tags
:
if
db
.
get_user_id
(
conn
,
user
[
"id"
])
==
-
1
and
user
[
"id"
]
not
in
user_ids
:
user_ids
.
add
(
user
[
"id"
])
usernames
.
append
(
user
[
"screen_name"
])
for
user
in
tweet
.
replies
:
if
db
.
get_user_id
(
conn
,
user
[
"id"
])
==
-
1
and
user
[
"id"
]
not
in
user_ids
:
user_ids
.
add
(
user
[
"id"
])
usernames
.
append
(
user
[
"screen_name"
])
for
user
in
usernames
:
url
=
f
"http://twitter.com/{user}?lang=en"
await
get
.
User
(
url
,
config
,
conn
)
async
def
Tweets
(
tw
,
location
,
config
,
conn
):
#logging.info("[<] " + str(datetime.now()) + ':: output+Tweets')
copyright
=
tw
.
find
(
"div"
,
"StreamItemContent--withheld"
)
if
copyright
is
None
and
is_tweet
(
tw
):
tweet
=
Tweet
(
tw
,
location
,
config
)
if
config
.
Database
is
not
None
and
config
.
User_info
:
await
tweetUserData
(
tweet
,
config
,
conn
)
if
datecheck
(
tweet
.
datestamp
,
config
):
output
=
format
.
Tweet
(
config
,
tweet
)
...
...
@@ -110,7 +131,7 @@ async def Users(u, config, conn):
output
=
format
.
User
(
config
.
Format
,
user
)
if
config
.
Database
:
db
.
user
(
conn
,
config
.
Username
,
config
.
Followers
,
user
)
db
.
user
(
conn
,
config
,
user
)
if
config
.
Elasticsearch
:
_save_date
=
user
.
join_date
...
...
twint/run.py
View file @
2cc218cd
...
...
@@ -101,6 +101,9 @@ class Twint:
if
self
.
config
.
User_id
is
not
None
:
self
.
config
.
Username
=
await
get
.
Username
(
self
.
config
.
User_id
)
if
self
.
config
.
Username
is
not
None
:
self
.
config
.
User_id
=
await
get
.
UserId
(
self
.
config
.
Username
)
if
self
.
config
.
TwitterSearch
and
self
.
config
.
Since
and
self
.
config
.
Until
:
_days
=
timedelta
(
days
=
int
(
self
.
config
.
Timedelta
))
while
self
.
d
.
_since
<
self
.
d
.
_until
:
...
...
twint/storage/db.py
View file @
2cc218cd
...
...
@@ -18,76 +18,181 @@ def init(db):
try
:
conn
=
sqlite3
.
connect
(
db
)
cursor
=
conn
.
cursor
()
table_users
=
"""
CREATE TABLE IF NOT EXISTS
users(
id integer not null,
id_str text not null,
name text,
username text not null,
bio text,
location text,
url text,
join_date text not null,
join_time text not null,
tweets integer,
following integer,
followers integer,
likes integer,
media integer,
private integer not null,
verified integer not null,
profile_image_url text not null,
background_image text,
date_update text not null,
CONSTRAINT users_pk PRIMARY KEY (id)
);
"""
cursor
.
execute
(
table_users
)
table_tweets
=
"""
CREATE TABLE IF NOT EXISTS
tweets (
id integer not null,
user_id integer,
id_str text not null,
tweet text default '',
conversation_id text not null,
created_at integer not null,
date text not null,
time text not null,
timezone text not null,
place text default '',
location text not null,
user text not null,
tweet text not null,
replies integer,
likes integer,
retweets integer,
hashtags text,
replies_count integer,
likes_count integer,
retweets_count integer,
user_id integer not null,
user_id_str text not null,
screen_name text not null,
name text default '',
profile_image_url text,
link text,
retweet bool,
user_rt text,
mentions text,
gif_url text,
gif_thumb text,
video_url text,
video_thumb text,
is_reply_to integer,
has_parent_tweet integer,
in_reply_to_screen_name text defualt '',
in_reply_to_status_id integer,
in_reply_to_status_id_str text default '',
in_reply_to_user_id integer,
in_reply_to_user_id_str text default '',
is_quote_status integer,
quote_id integer,
quote_id_str text,
quote_url text,
date_update text not null,
PRIMARY KEY (id)
);
"""
cursor
.
execute
(
table_tweets
)
table_
followers_name
s
=
"""
table_
retweet
s
=
"""
CREATE TABLE IF NOT EXISTS
followers_names (
user text not null,
date_update text not null,
follower text not null,
PRIMARY KEY (user, follower)
retweets(
user_id integer not null,
tweet_id integer not null,
CONSTRAINT retweets_pk PRIMARY KEY(user_id, tweet_id),
CONSTRAINT user_id_fk FOREIGN KEY(user_id) REFERENCES users(id),
CONSTRAINT tweet_id_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id)
);
"""
cursor
.
execute
(
table_retweets
)
table_mentions
=
"""
CREATE TABLE IF NOT EXISTS
mentions(
tweet_id integer not null,
id integer not null,
id_str text not null,
screen_name text not null,
CONSTRAINT mentions_pk PRIMARY KEY(tweet_id,id),
CONSTRAINT tweet_id_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id)
CONSTRAINT user_id_fk FOREIGN KEY(id) REFERENCES users(id)
);
"""
cursor
.
execute
(
table_
followers_name
s
)
cursor
.
execute
(
table_
mention
s
)
table_
following_nam
es
=
"""
table_
repli
es
=
"""
CREATE TABLE IF NOT EXISTS
following_names (
user text not null,
date_update text not null,
follows text not null,
PRIMARY KEY (user, follows)
replies(
tweet_id integer not null,
id integer not null,
id_str text not null,
screen_name text not null,
CONSTRAINT replies_pk PRIMARY KEY(tweet_id,id),
CONSTRAINT tweet_id_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id)
CONSTRAINT user_id_fk FOREIGN KEY(id) REFERENCES users(id)
);
"""
cursor
.
execute
(
table_following_names
)
cursor
.
execute
(
table_replies
)
table_tags
=
"""
CREATE TABLE IF NOT EXISTS
tags(
tweet_id integer not null,
id integer not null,
id_str text not null,
screen_name text not null,
CONSTRAINT tags_pk PRIMARY KEY(tweet_id, id),
CONSTRAINT tweet_id_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id),
CONSTRAINT user_id_fk FOREIGN KEY(id) REFERENCES users(id)
);
"""
cursor
.
execute
(
table_tags
)
table_hashtags
=
"""
CREATE TABLE IF NOT EXISTS
hashtags(
tweet_id integer not null,
tag_name text not null,
CONSTRAINT tweet_id_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id)
);
"""
cursor
.
execute
(
table_hashtags
)
table_urls
=
"""
CREATE TABLE IF NOT EXISTS
urls(
tweet_id integer not null,
url text not null,
CONSTRAINT urls_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id)
);
"""
cursor
.
execute
(
table_urls
)
table_photos
=
"""
CREATE TABLE IF NOT EXISTS
photos(
tweet_id integer not null,
url text not null,
CONSTRAINT photos_fk FOREIGN KEY(tweet_id) REFERENCES tweets(id)
);
"""
cursor
.
execute
(
table_photos
)
table_favorites
=
"""
CREATE TABLE IF NOT EXISTS
favorites(
user_id integer not null,
tweet_id integer not null,
CONSTRAINT favorites_pk PRIMARY KEY (user_id, tweet_id),
CONSTRAINT user_id_fk FOREIGN KEY (user_id) REFERENCES users(id),
CONSTRAINT tweet_id_fk FOREIGN KEY (tweet_id) REFERENCES tweets(id)
);
"""
cursor
.
execute
(
table_favorites
)
table_followers
=
"""
CREATE TABLE IF NOT EXISTS
followers (
id integer not null,
name text,
username text not null,
bio text,
location,
url text,
join_date text not null,
join_time text not null,
tweets integer,
following integer,
followers integer,
likes integer,
media integer,
private text not null,
verified text not null,
avatar text not null,
date_update text not null,
follower text not null,
PRIMARY KEY (id, username, follower)
follower_id integer not null,
CONSTRAINT followers_pk PRIMARY KEY (id, follower_id),
CONSTRAINT id_fk FOREIGN KEY(id) REFERENCES users(id),
CONSTRAINT follower_id_fk FOREIGN KEY(follower_id) REFERENCES users(id)
);
"""
cursor
.
execute
(
table_followers
)
...
...
@@ -96,27 +201,35 @@ def init(db):
CREATE TABLE IF NOT EXISTS
following (
id integer not null,
name text,
username text not null,
bio text,
location text,
url text,
join_date text not null,
join_time text not null,
tweets integer,
following integer,
followers integer,
likes integer,
media integer,
private text not null,
verified text not null,
avatar text not null,
following_id integer not null,
CONSTRAINT following_pk PRIMARY KEY (id, following_id),
CONSTRAINT id_fk FOREIGN KEY(id) REFERENCES users(id),
CONSTRAINT following_id_fk FOREIGN KEY(following_id) REFERENCES users(id)
);
"""
cursor
.
execute
(
table_following
)
table_followers_names
=
"""
CREATE TABLE IF NOT EXISTS
followers_names (
user text not null,
date_update text not null,
follower text not null,
PRIMARY KEY (user, follower)
);
"""
cursor
.
execute
(
table_followers_names
)
table_following_names
=
"""
CREATE TABLE IF NOT EXISTS
following_names (
user text not null,
date_update text not null,
follows text not null,
PRIMARY KEY (
id, username
, follows)
PRIMARY KEY (
user
, follows)
);
"""
cursor
.
execute
(
table_following
)
cursor
.
execute
(
table_following
_names
)
return
conn
except
Exception
as
e
:
...
...
@@ -150,11 +263,12 @@ def follow(conn, Username, Followers, User):
except
sqlite3
.
IntegrityError
:
pass
def
user
(
conn
,
Username
,
Followers
,
User
):
def
user
(
conn
,
config
,
User
):
try
:
date_time
=
str
(
datetime
.
now
())
cursor
=
conn
.
cursor
()
entry
=
(
User
.
id
,
entry
=
(
int
(
User
.
id
),
User
.
id
,
User
.
name
,
User
.
username
,
User
.
bio
,
...
...
@@ -170,37 +284,105 @@ def user(conn, Username, Followers, User):
User
.
is_private
,
User
.
is_verified
,
User
.
avatar
,
date_time
,
Username
,)
table
=
uTable
(
Followers
)
query
=
f
"INSERT INTO {table} VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"
User
.
background_image
,
date_time
)
query
=
f
"INSERT INTO users VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"
cursor
.
execute
(
query
,
entry
)
if
config
.
Followers
or
config
.
Following
:
table
=
uTable
(
config
.
Followers
)
query
=
f
"INSERT INTO {table} VALUES(?,?)"
cursor
.
execute
(
query
,
(
config
.
User_id
,
int
(
User
.
id
)))
conn
.
commit
()
except
sqlite3
.
IntegrityError
:
pass
def
get_user_id
(
conn
,
id
):
cursor
=
conn
.
cursor
()
cursor
.
execute
(
'SELECT id FROM users WHERE id = ? LIMIT 1'
,
(
id
,))
resultset
=
cursor
.
fetchall
()
return
resultset
[
0
][
0
]
if
resultset
else
-
1
def
tweets
(
conn
,
Tweet
,
config
):
try
:
date_time
=
str
(
datetime
.
now
())
cursor
=
conn
.
cursor
()
entry
=
(
Tweet
.
id
,
Tweet
.
user_id
,
Tweet
.
id_str
,
Tweet
.
tweet
,
Tweet
.
conversation_id
,
Tweet
.
datetime
,
Tweet
.
datestamp
,
Tweet
.
timestamp
,
Tweet
.
timezone
,
Tweet
.
place
,
Tweet
.
location
,
Tweet
.
replies_count
,
Tweet
.
likes_count
,
Tweet
.
retweets_count
,
Tweet
.
user_id
,
Tweet
.
user_id_str
,
Tweet
.
username
,
Tweet
.
tweet
,
Tweet
.
replies
,
Tweet
.
likes
,
Tweet
.
retweets
,
","
.
join
(
Tweet
.
hashtags
),
Tweet
.
name
,
Tweet
.
profile_image_url
,
Tweet
.
link
,
Tweet
.
retweet
,
Tweet
.
user_rt
,
","
.
join
(
Tweet
.
mentions
),
Tweet
.
gif_url
,
Tweet
.
gif_thumb
,
Tweet
.
video_url
,
Tweet
.
video_thumb
,
Tweet
.
is_reply_to
,
Tweet
.
has_parent_tweet
,
Tweet
.
in_reply_to_screen_name
,
Tweet
.
in_reply_to_status_id
,
Tweet
.
in_reply_to_status_id_str
,
Tweet
.
in_reply_to_user_id
,
Tweet
.
in_reply_to_user_id_str
,
Tweet
.
is_quote_status
,
Tweet
.
quote_id
,
Tweet
.
quote_id_str
,
Tweet
.
quote_url
,
date_time
)
cursor
.
execute
(
'INSERT INTO tweets VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)'
,
entry
)
cursor
.
execute
(
'INSERT INTO tweets VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)'
,
entry
)
if
len
(
Tweet
.
mentions
)
>
0
:
query
=
'INSERT INTO mentions VALUES(?, ?, ?, ?)'
for
mention
in
Tweet
.
mentions
:
cursor
.
execute
(
query
,
(
Tweet
.
id
,
mention
[
"id"
],
mention
[
"id_str"
],
mention
[
"screen_name"
]))
if
len
(
Tweet
.
replies
)
>
0
:
query
=
'INSERT INTO replies VALUES(?, ?, ?, ?)'
for
reply
in
Tweet
.
replies
:
cursor
.
execute
(
query
,
(
Tweet
.
id
,
reply
[
"id"
],
reply
[
"id_str"
],
reply
[
"screen_name"
]))
if
len
(
Tweet
.
tags
)
>
0
:
query
=
'INSERT INTO tags VALUES(?, ?, ?, ?)'
for
tag
in
Tweet
.
tags
:
cursor
.
execute
(
query
,
(
Tweet
.
id
,
tag
[
"id"
],
tag
[
"id_str"
],
tag
[
"screen_name"
]))
if
len
(
Tweet
.
hashtags
)
>
0
:
query
=
'INSERT OR IGNORE INTO hashtags (tweet_id, tag_name) VALUES(?,?)'
for
tag
in
Tweet
.
hashtags
:
cursor
.
execute
(
query
,
(
Tweet
.
id
,
tag
))
if
len
(
Tweet
.
urls
)
>
0
:
query
=
'INSERT INTO urls VALUES(?, ?)'
for
url
in
Tweet
.
urls
:
cursor
.
execute
(
query
,
(
Tweet
.
id
,
url
))
if
len
(
Tweet
.
photos
)
>
0
:
query
=
'INSERT INTO photos VALUES(?, ?)'
for
photo
in
Tweet
.
photos
:
cursor
.
execute
(
query
,
(
Tweet
.
id
,
photo
))
if
config
.
Favorites
:
query
=
'INSERT INTO favorites VALUES(?,?)'
cursor
.
execute
(
query
,
(
config
.
User_id
,
Tweet
.
id
))
if
Tweet
.
retweet
==
1
:
query
=
'INSERT INTO retweets VALUES(?,?)'
cursor
.
execute
(
query
,
(
config
.
User_id
,
Tweet
.
id
))
conn
.
commit
()
except
sqlite3
.
IntegrityError
:
pass
pass
\ No newline at end of file
twint/storage/elasticsearch.py
View file @
2cc218cd
...
...
@@ -4,10 +4,6 @@ from time import strftime, localtime
import
contextlib
import
sys
_index_tweet_status
=
False
_index_follow_status
=
False
_index_user_status
=
False
class
RecycleObject
(
object
):
def
write
(
self
,
junk
):
pass
def
flush
(
self
):
pass
...
...
@@ -19,116 +15,6 @@ def nostdout():
yield
sys
.
stdout
=
savestdout
def
handleIndexResponse
(
response
):
try
:
if
response
[
"status"
]
==
400
:
return
True
except
KeyError
:
pass
if
response
[
"acknowledged"
]:
print
(
"[+] Index
\"
"
+
response
[
"index"
]
+
"
\"
created!"
)
else
:
print
(
"[x] error index creation :: storage.elasticsearch.handleIndexCreation"
)
if
response
[
"shards_acknowledged"
]:
print
(
"[+] Shards acknowledged, everything is ready to be used!"
)
return
True
else
:
print
(
"[x] error with shards :: storage.elasticsearch.HandleIndexCreation"
)
return
False
def
createIndex
(
config
,
instance
,
**
scope
):
if
scope
.
get
(
"scope"
)
==
"tweet"
:
tweets_body
=
{
"mappings"
:
{
"items"
:
{
"properties"
:
{
"id"
:
{
"type"
:
"long"
},
"date"
:
{
"type"
:
"date"
,
"format"
:
"yyyy-MM-dd HH:mm:ss"
},
"timezone"
:
{
"type"
:
"text"
},
"location"
:
{
"type"
:
"text"
},
"hashtags"
:
{
"type"
:
"text"
},
"tweet"
:
{
"type"
:
"text"
},
"replies"
:
{
"type"
:
"boolean"
},
"retweets"
:
{
"type"
:
"boolean"
},
"likes"
:
{
"type"
:
"boolean"
},
"user_id"
:
{
"type"
:
"keyword"
},
"username"
:
{
"type"
:
"keyword"
},
"day"
:
{
"type"
:
"integer"
},
"hour"
:
{
"type"
:
"integer"
},
"link"
:
{
"type"
:
"text"
},
"retweet"
:
{
"type"
:
"text"
},
"user_rt"
:
{
"type"
:
"text"
},
"essid"
:
{
"type"
:
"keyword"
},
"nlikes"
:
{
"type"
:
"integer"
},
"nreplies"
:
{
"type"
:
"integer"
},
"nretweets"
:
{
"type"
:
"integer"
},
"search"
:
{
"type"
:
"text"
}
}
}
},
"settings"
:
{
"number_of_shards"
:
1
}
}
with
nostdout
():
resp
=
instance
.
indices
.
create
(
index
=
config
.
Index_tweets
,
body
=
tweets_body
,
ignore
=
400
)
return
handleIndexResponse
(
resp
)
elif
scope
.
get
(
"scope"
)
==
"follow"
:
follow_body
=
{
"mappings"
:
{
"items"
:
{
"properties"
:
{
"user"
:
{
"type"
:
"keyword"
},
"follow"
:
{
"type"
:
"keyword"
},
"essid"
:
{
"type"
:
"keyword"
}
}
}
},
"settings"
:
{
"number_of_shards"
:
1
}
}
with
nostdout
():
resp
=
instance
.
indices
.
create
(
index
=
config
.
Index_follow
,
body
=
follow_body
,
ignore
=
400
)
return
handleIndexResponse
(
resp
)
elif
scope
.
get
(
"scope"
)
==
"user"
:
user_body
=
{
"mappings"
:
{
"items"
:
{
"properties"
:
{
"id"
:
{
"type"
:
"keyword"
},
"name"
:
{
"type"
:
"keyword"
},
"username"
:
{
"type"
:
"keyword"
},
"bio"
:
{
"type"
:
"text"
},
"location"
:
{
"type"
:
"keyword"
},
"url"
:
{
"type"
:
"text"
},
"join_datetime"
:
{
"type"
:
"date"
,
"format"
:
"yyyy-MM-dd HH:mm:ss"
},
"join_date"
:
{
"type"
:
"date"
,
"format"
:
"yyyy-MM-dd"
},
"join_time"
:
{
"type"
:
"date"
,
"format"
:
"HH:mm:ss"
},
"tweets"
:
{
"type"
:
"integer"
},
"following"
:
{
"type"
:
"integer"
},
"followers"
:
{
"type"
:
"integer"
},
"likes"
:
{
"type"
:
"integer"
},
"media"
:
{
"type"
:
"integer"
},
"private"
:
{
"type"
:
"boolean"
},
"verified"
:
{
"type"
:
"boolean"
},
"avatar"
:
{
"type"
:
"text"
},
"essid"
:
{
"type"
:
"keyword"
}
}
}
},
"settings"
:
{
"number_of_shards"
:
1
}
}
with
nostdout
():
resp
=
instance
.
indices
.
create
(
index
=
config
.
Index_users
,
body
=
user_body
,
ignore
=
400
)
return
handleIndexResponse
(
resp
)
else
:
print
(
"[x] error index pre-creation :: storage.elasticsearch.createIndex"
)
return
False
def
weekday
(
day
):
weekdays
=
{
"Monday"
:
1
,
...
...
@@ -146,7 +32,6 @@ def hour(datetime):
return
strftime
(
"
%
H"
,
localtime
(
datetime
))
def
Tweet
(
Tweet
,
config
):
global
_index_tweet_status
weekdays
=
{
"Monday"
:
1
,
"Tuesday"
:
2
,
...
...
@@ -159,6 +44,9 @@ def Tweet(Tweet, config):
day
=
weekdays
[
strftime
(
"
%
A"
,
localtime
(
Tweet
.
datetime
))]
actions
=
[]
nLikes
=
1
nReplies
=
1
nRetweets
=
1
dt
=
f
"{Tweet.datestamp} {Tweet.timestamp}"
...
...
@@ -172,35 +60,182 @@ def Tweet(Tweet, config):
"created_at"
:
Tweet
.
datetime
,
"date"
:
dt
,
"timezone"
:
Tweet
.
timezone
,
"place"
:
Tweet
.
place
,
"location"
:
Tweet
.
location
,
"tweet"
:
Tweet
.
tweet
,
"hashtags"
:
Tweet
.
hashtags
,
"user_id"
:
Tweet
.
user_id
,
"user_id_str"
:
Tweet
.
user_id_str
,
"username"
:
Tweet
.
username
,
"name"
:
Tweet
.
name
,
"profile_image_url"
:
Tweet
.
profile_image_url
,
"day"
:
day
,
"hour"
:
hour
(
Tweet
.
datetime
),
"link"
:
Tweet
.
link
,
"gif_url"
:
Tweet
.
gif_url
,
"gif_thumb"
:
Tweet
.
gif_thumb
,
"video_url"
:
Tweet
.
video_url
,
"video_thumb"
:
Tweet
.
video_thumb
,
"is_reply_to"
:
Tweet
.
is_reply_to
,
"has_parent_tweet"
:
Tweet
.
has_parent_tweet
,
"retweet"
:
Tweet
.
retweet
,
"user_rt"
:
Tweet
.
user_rt
,
"essid"
:
config
.
Essid
,
"nlikes"
:
int
(
Tweet
.
likes
),
"nreplies"
:
int
(
Tweet
.
replies
),
"nretweets"
:
int
(
Tweet
.
retweets
),
"nlikes"
:
int
(
Tweet
.
likes_count
),
"nreplies"
:
int
(
Tweet
.
replies_count
),
"nretweets"
:
int
(
Tweet
.
retweets_count
),
"is_quote_status"
:
Tweet
.
is_quote_status
,
"quote_id"
:
Tweet
.
quote_id
,
"quote_id_str"
:
Tweet
.
quote_id_str
,
"quote_url"
:
Tweet
.
quote_url
,
"search"
:
str
(
config
.
Search
)
}
}
actions
.
append
(
j_data
)
if
config
.
ES_count
[
"likes"
]:
for
l
in
range
(
int
(
Tweet
.
likes
)):
j_data
=
{
"_index"
:
config
.
Index_tweets
,
"_type"
:
config
.
Index_type
,
"_id"
:
str
(
Tweet
.
id
)
+
"_like_"
+
str
(
nLikes
)
+
config
.
Essid
,
"_source"
:
{
"id"
:
str
(
Tweet
.
id
),
"conversation_id"
:
Tweet
.
conversation_id
,
"created_at"
:
Tweet
.
datetime
,
"date"
:
dt
,
"timezone"
:
Tweet
.
timezone
,
"place"
:
Tweet
.
place
,
"location"
:
Tweet
.
location
,
"tweet"
:
Tweet
.
tweet
,
"hashtags"
:
Tweet
.
hashtags
,
"user_id"
:
Tweet
.
user_id
,
"user_id_str"
:
Tweet
.
user_id_str
,
"username"
:
Tweet
.
username
,
"name"
:
Tweet
.
name
,
"profile_image_url"
:
Tweet
.
profile_image_url
,
"day"
:
day
,
"hour"
:
hour
(
Tweet
.
datetime
),
"link"
:
Tweet
.
link
,
"gif_url"
:
Tweet
.
gif_url
,
"gif_thumb"
:
Tweet
.
gif_thumb
,
"video_url"
:
Tweet
.
video_url
,
"video_thumb"
:
Tweet
.
video_thumb
,
"is_reply_to"
:
Tweet
.
is_reply_to
,
"has_parent_tweet"
:
Tweet
.
has_parent_tweet
,
"retweet"
:
Tweet
.
retweet
,
"essid"
:
config
.
Essid
,
"nlikes"
:
int
(
Tweet
.
likes_count
),
"nreplies"
:
int
(
Tweet
.
replies_count
),
"nretweets"
:
int
(
Tweet
.
retweets_count
),
"is_quote_status"
:
Tweet
.
is_quote_status
,
"quote_id"
:
Tweet
.
quote_id
,
"quote_id_str"
:
Tweet
.
quote_id_str
,
"quote_url"
:
Tweet
.
quote_url
,
"search"
:
str
(
config
.
Search
),
"likes"
:
True
}
}
actions
.
append
(
j_data
)
nLikes
+=
1
if
config
.
ES_count
[
"replies"
]:
for
rep
in
range
(
int
(
Tweet
.
replies
)):
j_data
=
{
"_index"
:
config
.
Index_tweets
,
"_type"
:
config
.
Index_type
,
"_id"
:
str
(
Tweet
.
id
)
+
"_reply_"
+
str
(
nReplies
)
+
config
.
Essid
,
"_source"
:
{
"id"
:
str
(
Tweet
.
id
),
"conversation_id"
:
Tweet
.
conversation_id
,
"created_at"
:
Tweet
.
datetime
,
"date"
:
dt
,
"timezone"
:
Tweet
.
timezone
,
"place"
:
Tweet
.
place
,
"location"
:
Tweet
.
location
,
"tweet"
:
Tweet
.
tweet
,
"hashtags"
:
Tweet
.
hashtags
,
"user_id"
:
Tweet
.
user_id
,
"user_id_str"
:
Tweet
.
user_id_str
,
"username"
:
Tweet
.
username
,
"name"
:
Tweet
.
name
,
"profile_image_url"
:
Tweet
.
profile_image_url
,
"day"
:
day
,
"hour"
:
hour
(
Tweet
.
datetime
),
"link"
:
Tweet
.
link
,
"gif_url"
:
Tweet
.
gif_url
,
"gif_thumb"
:
Tweet
.
gif_thumb
,
"video_url"
:
Tweet
.
video_url
,
"video_thumb"
:
Tweet
.
video_thumb
,
"is_reply_to"
:
Tweet
.
is_reply_to
,
"has_parent_tweet"
:
Tweet
.
has_parent_tweet
,
"retweet"
:
Tweet
.
retweet
,
"essid"
:
config
.
Essid
,
"nlikes"
:
int
(
Tweet
.
likes_count
),
"nreplies"
:
int
(
Tweet
.
replies_count
),
"nretweets"
:
int
(
Tweet
.
retweets_count
),
"is_quote_status"
:
Tweet
.
is_quote_status
,
"quote_id"
:
Tweet
.
quote_id
,
"quote_id_str"
:
Tweet
.
quote_id_str
,
"quote_url"
:
Tweet
.
quote_url
,
"search"
:
str
(
config
.
Search
),
"replies"
:
True
}
}
actions
.
append
(
j_data
)
nReplies
+=
1
if
config
.
ES_count
[
"retweets"
]:
for
ret
in
range
(
int
(
Tweet
.
retweets
)):
j_data
=
{
"_index"
:
config
.
Index_tweets
,
"_type"
:
config
.
Index_type
,
"_id"
:
str
(
Tweet
.
id
)
+
"_retweet_"
+
str
(
nRetweets
)
+
config
.
Essid
,
"_source"
:
{
"id"
:
str
(
Tweet
.
id
),
"conversation_id"
:
Tweet
.
conversation_id
,
"created_at"
:
Tweet
.
datetime
,
"date"
:
dt
,
"timezone"
:
Tweet
.
timezone
,
"place"
:
Tweet
.
place
,
"location"
:
Tweet
.
location
,
"tweet"
:
Tweet
.
tweet
,
"hashtags"
:
Tweet
.
hashtags
,
"user_id"
:
Tweet
.
user_id
,
"user_id_str"
:
Tweet
.
user_id_str
,
"username"
:
Tweet
.
username
,
"name"
:
Tweet
.
name
,
"profile_image_url"
:
Tweet
.
profile_image_url
,
"day"
:
day
,
"hour"
:
hour
(
Tweet
.
datetime
),
"link"
:
Tweet
.
link
,
"gif_url"
:
Tweet
.
gif_url
,
"gif_thumb"
:
Tweet
.
gif_thumb
,
"video_url"
:
Tweet
.
video_url
,
"video_thumb"
:
Tweet
.
video_thumb
,
"is_reply_to"
:
Tweet
.
is_reply_to
,
"has_parent_tweet"
:
Tweet
.
has_parent_tweet
,
"retweet"
:
Tweet
.
retweet
,
"essid"
:
config
.
Essid
,
"nlikes"
:
int
(
Tweet
.
likes_count
),
"nreplies"
:
int
(
Tweet
.
replies_count
),
"nretweets"
:
int
(
Tweet
.
retweets_count
),
"is_quote_status"
:
Tweet
.
is_quote_status
,
"quote_id"
:
Tweet
.
quote_id
,
"quote_id_str"
:
Tweet
.
quote_id_str
,
"quote_url"
:
Tweet
.
quote_url
,
"search"
:
str
(
config
.
Search
),
"retweets"
:
True
}
}
actions
.
append
(
j_data
)
nRetweets
+=
1
es
=
Elasticsearch
(
config
.
Elasticsearch
)
if
not
_index_tweet_status
:
_index_tweet_status
=
createIndex
(
config
,
es
,
scope
=
"tweet"
)
with
nostdout
():
helpers
.
bulk
(
es
,
actions
,
chunk_size
=
2000
,
request_timeout
=
200
)
actions
=
[]
def
Follow
(
user
,
config
):
global
_index_follow_status
actions
=
[]
j_data
=
{
...
...
@@ -216,14 +251,11 @@ def Follow(user, config):
actions
.
append
(
j_data
)
es
=
Elasticsearch
(
config
.
Elasticsearch
)
if
not
_index_follow_status
:
_index_follow_status
=
createIndex
(
config
,
es
,
scope
=
"follow"
)
with
nostdout
():
helpers
.
bulk
(
es
,
actions
,
chunk_size
=
2000
,
request_timeout
=
200
)
actions
=
[]
def
UserProfile
(
user
,
config
):
global
_index_user_status
actions
=
[]
j_data
=
{
...
...
@@ -248,14 +280,13 @@ def UserProfile(user, config):
"private"
:
user
.
is_private
,
"verified"
:
user
.
is_verified
,
"avatar"
:
user
.
avatar
,
"background_image"
:
user
.
background_image
,
"session"
:
config
.
Essid
}
}
actions
.
append
(
j_data
)
es
=
Elasticsearch
(
config
.
Elasticsearch
)
if
not
_index_user_status
:
_index_user_status
=
createIndex
(
config
,
es
,
scope
=
"user"
)
with
nostdout
():
helpers
.
bulk
(
es
,
actions
,
chunk_size
=
2000
,
request_timeout
=
200
)
actions
=
[]
actions
=
[]
\ No newline at end of file
twint/tweet.py
View file @
2cc218cd
from
time
import
strftime
,
localtime
import
re
import
json
#from datetime import datetime
#import logging
...
...
@@ -11,16 +11,84 @@ class tweet:
def
__init__
(
self
):
pass
def
getRawURLS
(
tw
,
link
,
config
):
player
=
tw
.
find_all
(
"div"
,
"PlayableMedia-player"
)
gif_url
,
gif_thumb
,
video_url
,
video_thumb
=
""
,
""
,
""
,
""
for
node
in
player
:
styles
=
node
.
attrs
[
'style'
]
.
split
()
for
style
in
styles
:
if
style
.
startswith
(
'background'
):
tmp
=
"background-image:url('"
style
=
style
.
replace
(
tmp
,
""
)
if
"tweet_video_thumb"
in
style
:
gif_url
=
style
.
replace
(
"')"
,
''
)
gif_url
=
gif_url
.
replace
(
'.jpg'
,
'.mp4'
)
gif_url
=
gif_url
.
replace
(
'https://pbs'
,
'https://video'
)
gif_url
=
gif_url
.
replace
(
"_thumb"
,
""
)
gif_thumb
=
style
.
replace
(
"')"
,
""
)
else
:
video_url
,
video_thumb
=
"video"
,
"video_thumb"
return
gif_url
,
gif_thumb
,
video_url
,
video_thumb
def
getMentions
(
tw
):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getMentions')
"""Extract ment from tweet
"""
mentions
=
[{
"id"
:
int
(
mention
[
"data-mentioned-user-id"
]),
"id_str"
:
mention
[
"data-mentioned-user-id"
],
"screen_name"
:
mention
.
get
(
'href'
)
.
split
(
"/"
)[
-
1
]}
for
mention
in
tw
.
find_all
(
'a'
,{
'class'
:
'twitter-atreply'
})]
return
mentions
def
getReplies
(
tw
):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getReplies')
"""Extract replies from tweet
"""
replyToUsersJSON
=
json
.
loads
(
tw
.
find
(
"div"
)[
"data-reply-to-users-json"
])
replies
=
[{
"id"
:
int
(
reply
[
"id_str"
]),
"id_str"
:
reply
[
"id_str"
],
"screen_name"
:
reply
[
"screen_name"
]}
for
reply
in
replyToUsersJSON
]
return
replies
def
getTags
(
tw
):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getTags')
"""Extract tags from tweet
"""
tags
=
[]
try
:
mentions
=
tw
.
find
(
"div"
,
"js-original-tweet"
)[
"data-mentions"
]
.
split
(
" "
)
tag_links
=
tw
.
find
(
"div"
,
"media-tagging-block"
)
.
find_all
(
"a"
,
"js-user-profile-link"
)
for
tag
in
tag_links
:
if
tag
.
has_attr
(
"data-user-id"
):
tmpData
=
{
"id"
:
int
(
tag
[
"data-user-id"
]),
"id_str"
:
tag
[
"data-user-id"
],
"screen_name"
:
tag
.
get
(
'href'
)
.
split
(
"/"
)[
-
1
]
}
tags
.
append
(
tmpData
)
except
:
mentions
=
""
tags
=
[]
return
mentions
return
tags
def
getQuoteInfo
(
tw
):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getQuoteInfo')
"""Extract quote from tweet
"""
base_twitter
=
"https://twitter.com"
quote_status
=
0
quote_id
=
0
quote_id_str
=
""
quote_url
=
""
try
:
quote
=
tw
.
find
(
"div"
,
"QuoteTweet-innerContainer"
)
quote_status
=
1
quote_id
=
int
(
quote
[
"data-item-id"
])
quote_id_str
=
quote
[
"data-item-id"
]
quote_url
=
base_twitter
+
quote
.
get
(
"href"
)
except
:
quote_status
=
0
return
quote_status
,
quote_id
,
quote_id_str
,
quote_url
def
getText
(
tw
):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getText')
...
...
@@ -33,25 +101,6 @@ def getText(tw):
return
text
def
getTweet
(
tw
,
mentions
):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getTweet')
try
:
text
=
getText
(
tw
)
for
i
in
range
(
len
(
mentions
)):
mention
=
f
"@{mentions[i]}"
if
mention
not
in
text
:
text
=
f
"{mention} {text}"
except
:
text
=
getText
(
tw
)
return
text
def
getHashtags
(
text
):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getHashtags')
"""Get hashtags of tweet
"""
return
re
.
findall
(
r'(?i)\#\w+'
,
text
,
flags
=
re
.
UNICODE
)
def
getStat
(
tw
,
_type
):
"""Get stats about Tweet
"""
...
...
@@ -61,42 +110,49 @@ def getStat(tw, _type):
def
getRetweet
(
profile
,
username
,
user
):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getRetweet')
if
profile
and
username
.
lower
()
!=
user
:
return
True
def
getUser_rt
(
profile
,
username
,
user
):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getUser_rt')
"""Get username that retweeted
"""
if
getRetweet
(
profile
,
username
,
user
):
user_rt
=
user
else
:
user_rt
=
"None"
return
user_rt
if
profile
and
username
.
lower
()
!=
user
.
lower
():
return
1
def
Tweet
(
tw
,
location
,
config
):
"""Create Tweet object
"""
##logging.info("[<] " + str(datetime.now()) + ':: tweet+Tweet')
t
=
tweet
()
t
.
id
=
tw
.
find
(
"div"
)[
"data-item-id"
]
t
.
id
=
int
(
tw
.
find
(
"div"
)[
"data-item-id"
])
t
.
id_str
=
tw
.
find
(
"div"
)[
"data-item-id"
]
t
.
conversation_id
=
tw
.
find
(
"div"
)[
"data-conversation-id"
]
t
.
datetime
=
int
(
tw
.
find
(
"span"
,
"_timestamp"
)[
"data-time"
])
t
.
datestamp
=
strftime
(
"
%
Y-
%
m-
%
d"
,
localtime
(
t
.
datetime
))
t
.
timestamp
=
strftime
(
"
%
H:
%
M:
%
S"
,
localtime
(
t
.
datetime
))
t
.
user_id
=
tw
.
find
(
"a"
,
"account-group js-account-group js-action-profile js-user-profile-link js-nav"
)[
"data-user-id"
]
t
.
username
=
tw
.
find
(
"span"
,
"username"
)
.
text
.
replace
(
"@"
,
""
)
t
.
user_id
=
int
(
tw
.
find
(
"div"
)[
"data-user-id"
])
t
.
user_id_str
=
tw
.
find
(
"div"
)[
"data-user-id"
]
t
.
username
=
tw
.
find
(
"div"
)[
"data-screen-name"
]
t
.
name
=
tw
.
find
(
"div"
)[
"data-name"
]
t
.
profile_image_url
=
tw
.
find
(
"img"
,
"js-action-profile-avatar"
)
.
get
(
'src'
)
.
replace
(
"_bigger"
,
""
)
t
.
place
=
tw
.
find
(
"a"
,
"js-geo-pivot-link"
)
.
text
.
strip
()
if
tw
.
find
(
"a"
,
"js-geo-pivot-link"
)
else
None
t
.
timezone
=
strftime
(
"
%
Z"
,
localtime
())
for
img
in
tw
.
findAll
(
"img"
,
"Emoji Emoji--forText"
):
img
.
replaceWith
(
img
[
"alt"
])
t
.
mentions
=
getMentions
(
tw
)
t
.
tweet
=
getTweet
(
tw
,
t
.
mentions
)
t
.
tags
=
getTags
(
tw
)
t
.
replies
=
getReplies
(
tw
)
t
.
urls
=
[
link
.
attrs
[
"data-expanded-url"
]
for
link
in
tw
.
find_all
(
'a'
,{
'class'
:
'twitter-timeline-link'
})
if
link
.
has_attr
(
"data-expanded-url"
)]
t
.
photos
=
[
photo_node
.
attrs
[
'data-image-url'
]
for
photo_node
in
tw
.
find_all
(
"div"
,
"AdaptiveMedia-photoContainer"
)]
t
.
tweet
=
getText
(
tw
)
t
.
location
=
location
t
.
hashtags
=
getHashtags
(
t
.
tweet
)
t
.
replies
=
getStat
(
tw
,
"reply"
)
t
.
retweets
=
getStat
(
tw
,
"retweet"
)
t
.
likes
=
getStat
(
tw
,
"favorite"
)
t
.
hashtags
=
[
hashtag
.
text
for
hashtag
in
tw
.
find_all
(
"a"
,
"twitter-hashtag"
)]
t
.
replies
_count
=
getStat
(
tw
,
"reply"
)
t
.
retweets
_count
=
getStat
(
tw
,
"retweet"
)
t
.
likes
_count
=
getStat
(
tw
,
"favorite"
)
t
.
link
=
f
"https://twitter.com/{t.username}/status/{t.id}"
t
.
retweet
=
getRetweet
(
config
.
Profile
,
t
.
username
,
config
.
Username
)
t
.
user_rt
=
getUser_rt
(
config
.
Profile
,
t
.
username
,
config
.
Username
)
return
t
t
.
gif_url
,
t
.
gif_thumb
,
t
.
video_url
,
t
.
video_thumb
=
getRawURLS
(
tw
,
t
.
link
,
config
)
t
.
is_quote_status
,
t
.
quote_id
,
t
.
quote_id_str
,
t
.
quote_url
=
getQuoteInfo
(
tw
)
t
.
is_reply_to
=
int
(
bool
(
tw
.
find
(
"div"
)[
"data-is-reply-to"
]))
if
tw
.
find
(
"div"
)
.
has_attr
(
"data-is-reply-to"
)
else
0
t
.
has_parent_tweet
=
int
(
bool
(
tw
.
find
(
"div"
)[
"data-has-parent-tweet"
]))
if
tw
.
find
(
"div"
)
.
has_attr
(
"data-has-parent-tweet"
)
else
0
t
.
in_reply_to_screen_name
=
""
t
.
in_reply_to_status_id
=
0
t
.
in_reply_to_status_id_str
=
""
t
.
in_reply_to_user_id
=
0
t
.
in_reply_to_user_id_str
=
""
return
t
\ No newline at end of file
twint/user.py
View file @
2cc218cd
...
...
@@ -20,6 +20,10 @@ def inf(ur, _type):
ret
=
group
[
"data-screen-name"
]
elif
_type
==
"private"
:
ret
=
group
[
"data-protected"
]
if
ret
==
'true'
:
ret
=
1
else
:
ret
=
0
return
ret
...
...
@@ -28,18 +32,18 @@ def card(ur, _type):
try
:
ret
=
ur
.
find
(
"p"
,
"ProfileHeaderCard-bio u-dir"
)
.
text
.
replace
(
"
\n
"
,
" "
)
except
:
ret
=
"None"
ret
=
None
elif
_type
==
"location"
:
try
:
ret
=
ur
.
find
(
"span"
,
"ProfileHeaderCard-locationText u-dir"
)
.
text
ret
=
ret
[
15
:]
.
replace
(
"
\n
"
,
" "
)[:
-
10
]
except
:
ret
=
"None"
ret
=
None
elif
_type
==
"url"
:
try
:
ret
=
ur
.
find
(
"span"
,
"ProfileHeaderCard-urlText u-dir"
)
.
find
(
"a"
)[
"title"
]
except
:
ret
=
"None"
ret
=
None
return
ret
...
...
@@ -54,11 +58,13 @@ def convertToInt(x):
"b"
:
1000000000
,
}
try
:
if
','
in
x
:
x
=
x
.
replace
(
','
,
''
)
y
=
int
(
x
)
return
y
except
:
pass
try
:
y
=
float
(
str
(
x
)[:
-
1
])
y
=
y
*
multDict
[
str
(
x
)[
-
1
:]
.
lower
()]
...
...
@@ -79,11 +85,10 @@ def stat(ur, _type):
def
media
(
ur
):
try
:
media_count
=
ur
.
find
(
"a"
,
"PhotoRail-headingWithCount js-nav"
)
.
text
media_count
=
media_count
.
replace
(
"
\n
"
,
""
)[
32
:]
.
split
(
" "
)[
0
]
media_count
=
convertToInt
(
media_count
)
media_count
=
ur
.
find
(
"a"
,
"PhotoRail-headingWithCount js-nav"
)
.
text
.
strip
()
.
split
(
" "
)[
0
]
media_count
=
convertToInt
(
media_count
)
except
:
media_count
=
"0"
media_count
=
0
return
media_count
...
...
@@ -91,11 +96,11 @@ def verified(ur):
try
:
is_verified
=
ur
.
find
(
"span"
,
"ProfileHeaderCard-badges"
)
.
text
if
"Verified account"
in
is_verified
:
is_verified
=
"true"
is_verified
=
1
else
:
is_verified
=
"false"
is_verified
=
0
except
:
is_verified
=
"false"
is_verified
=
0
return
is_verified
...
...
@@ -119,4 +124,5 @@ def User(ur):
u
.
is_private
=
inf
(
ur
,
"private"
)
u
.
is_verified
=
verified
(
ur
)
u
.
avatar
=
ur
.
find
(
"img"
,
"ProfileAvatar-image"
)[
"src"
]
return
u
u
.
background_image
=
ur
.
find
(
'div'
,{
'class'
:
'ProfileCanopy-headerBg'
})
.
find
(
'img'
)
.
get
(
'src'
)
return
u
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment