Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
T
Twint
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Packages
Packages
List
Container Registry
Analytics
Analytics
CI / CD
Code Review
Insights
Issues
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nanahira
Twint
Commits
d4672ee0
Commit
d4672ee0
authored
Sep 27, 2018
by
Francesco Poldi
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added loggin options, fixes retweets
parent
700d30d6
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
82 additions
and
2 deletions
+82
-2
twint/__init__.py
twint/__init__.py
+9
-0
twint/datelock.py
twint/datelock.py
+3
-0
twint/feed.py
twint/feed.py
+6
-0
twint/format.py
twint/format.py
+5
-0
twint/get.py
twint/get.py
+12
-0
twint/output.py
twint/output.py
+9
-0
twint/run.py
twint/run.py
+19
-2
twint/tweet.py
twint/tweet.py
+10
-0
twint/url.py
twint/url.py
+9
-0
No files found.
twint/__init__.py
View file @
d4672ee0
...
...
@@ -9,3 +9,12 @@ Copyright (c) 2018 Cody Zacharias
'''
from
.config
import
Config
from
.
import
run
#import logging
#logger = logging.getLogger()
#handler = logging.FileHandler('twint.log')
#formatter = logging.Formatter(
# '%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
#handler.setFormatter(formatter)
#logger.addHandler(handler)
#logger.setLevel(logging.DEBUG)
\ No newline at end of file
twint/datelock.py
View file @
d4672ee0
import
datetime
#import logging
class
Datelock
:
_until
=
None
_since
=
None
_since_def_user
=
None
def
Set
(
Until
,
Since
):
#logging.info("[<] " + str(datetime.datetime.now()) + ':: datelock+Set')
d
=
Datelock
()
if
Until
:
...
...
twint/feed.py
View file @
d4672ee0
from
bs4
import
BeautifulSoup
from
re
import
findall
from
json
import
loads
#import logging
#from datetime import datetime
def
Follow
(
response
):
#logging.info("[<] " + str(datetime.now()) + ':: feed+Follow')
soup
=
BeautifulSoup
(
response
,
"html.parser"
)
follow
=
soup
.
find_all
(
"td"
,
"info fifty screenname"
)
cursor
=
soup
.
find_all
(
"div"
,
"w-button-more"
)
...
...
@@ -14,6 +17,7 @@ def Follow(response):
return
follow
,
cursor
def
Mobile
(
response
):
#logging.info("[<] " + str(datetime.now()) + ':: feed+Mobile')
soup
=
BeautifulSoup
(
response
,
"html.parser"
)
tweets
=
soup
.
find_all
(
"span"
,
"metadata"
)
max_id
=
soup
.
find_all
(
"div"
,
"w-button-more"
)
...
...
@@ -25,6 +29,7 @@ def Mobile(response):
return
tweets
,
max_id
def
profile
(
response
):
#logging.info("[<] " + str(datetime.now()) + ':: feed+profile')
json_response
=
loads
(
response
)
html
=
json_response
[
"items_html"
]
soup
=
BeautifulSoup
(
html
,
"html.parser"
)
...
...
@@ -33,6 +38,7 @@ def profile(response):
return
feed
,
feed
[
-
1
][
"data-item-id"
]
def
Json
(
response
):
#logging.info("[<] " + str(datetime.now()) + ':: feed+Json')
json_response
=
loads
(
response
)
html
=
json_response
[
"items_html"
]
soup
=
BeautifulSoup
(
html
,
"html.parser"
)
...
...
twint/format.py
View file @
d4672ee0
#import logging
#from datetime import datetime
def
Tweet
(
config
,
t
):
#logging.info("[<] " + str(datetime.now()) + ':: format+Tweet')
if
config
.
Format
:
output
=
config
.
Format
.
replace
(
"{id}"
,
t
.
id
)
output
=
output
.
replace
(
"{date}"
,
t
.
datestamp
)
...
...
@@ -35,6 +39,7 @@ def Tweet(config, t):
return
output
def
User
(
_format
,
u
):
#logging.info("[<] " + str(datetime.now()) + ':: format+User')
if
_format
:
output
=
_format
.
replace
(
"{id}"
,
u
.
id
)
output
+=
output
.
replace
(
"{name}"
,
u
.
name
)
...
...
twint/get.py
View file @
d4672ee0
from
async_timeout
import
timeout
from
datetime
import
datetime
from
bs4
import
BeautifulSoup
import
sys
import
aiohttp
...
...
@@ -10,7 +11,10 @@ from aiohttp_socks import SocksConnector, SocksVer
from
.
import
url
from
.output
import
Tweets
,
Users
#import logging
async
def
RequestUrl
(
config
,
init
):
#loggin.info("[<] " + str(datetime.now()) + ':: get+requestURL')
_connector
=
None
if
config
.
Proxy_host
is
not
None
:
if
config
.
Proxy_host
.
lower
()
==
"tor"
:
...
...
@@ -66,6 +70,7 @@ async def RequestUrl(config, init):
return
response
async
def
MobileRequest
(
url
,
**
options
):
#loggin.info("[<] " + str(datetime.now()) + ':: get+MobileRequest')
ua
=
{
'User-Agent'
:
'Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/0.8.12'
}
connector
=
options
.
get
(
"_connector"
)
if
connector
:
...
...
@@ -75,6 +80,7 @@ async def MobileRequest(url, **options):
return
await
Response
(
session
,
url
)
async
def
Request
(
url
,
**
options
):
#loggin.info("[<] " + str(datetime.now()) + ':: get+Request')
connector
=
options
.
get
(
"_connector"
)
if
connector
:
async
with
aiohttp
.
ClientSession
(
connector
=
connector
)
as
session
:
...
...
@@ -83,11 +89,13 @@ async def Request(url, **options):
return
await
Response
(
session
,
url
)
async
def
Response
(
session
,
url
):
#loggin.info("[<] " + str(datetime.now()) + ':: get+Response')
with
timeout
(
30
):
async
with
session
.
get
(
url
,
ssl
=
False
)
as
response
:
return
await
response
.
text
()
async
def
Username
(
_id
):
#loggin.info("[<] " + str(datetime.now()) + ':: get+Username')
url
=
f
"https://twitter.com/intent/user?user_id={_id}&lang=en"
r
=
await
Request
(
url
)
soup
=
BeautifulSoup
(
r
,
"html.parser"
)
...
...
@@ -95,6 +103,7 @@ async def Username(_id):
return
soup
.
find
(
"a"
,
"fn url alternate-context"
)[
"href"
]
.
replace
(
"/"
,
""
)
async
def
Tweet
(
url
,
config
,
conn
):
#loggin.info("[<] " + str(datetime.now()) + ':: Tweet')
try
:
response
=
await
Request
(
url
)
soup
=
BeautifulSoup
(
response
,
"html.parser"
)
...
...
@@ -106,6 +115,7 @@ async def Tweet(url, config, conn):
pass
async
def
User
(
url
,
config
,
conn
):
#loggin.info("[<] " + str(datetime.now()) + ':: get+User')
try
:
response
=
await
Request
(
url
)
soup
=
BeautifulSoup
(
response
,
"html.parser"
)
...
...
@@ -114,10 +124,12 @@ async def User(url, config, conn):
pass
def
Limit
(
Limit
,
count
):
#loggin.info("[<] " + str(datetime.now()) + ':: get+Limit')
if
Limit
is
not
None
and
count
>=
int
(
Limit
):
return
True
async
def
Multi
(
feed
,
config
,
conn
):
#loggin.info("[<] " + str(datetime.now()) + ':: get+Multi')
count
=
0
try
:
with
concurrent
.
futures
.
ThreadPoolExecutor
(
max_workers
=
20
)
as
executor
:
...
...
twint/output.py
View file @
d4672ee0
...
...
@@ -4,6 +4,8 @@ from .user import User
from
datetime
import
datetime
from
.storage
import
db
,
elasticsearch
,
write
,
panda
#import logging
follow_object
=
{}
tweets_object
=
[]
...
...
@@ -12,10 +14,12 @@ user_object = []
_follow_list
=
[]
def
clean_follow_list
():
#logging.info("[<] " + str(datetime.now()) + ':: output+clean_follow_list')
global
_follow_list
_follow_list
=
[]
def
datecheck
(
datestamp
,
config
):
#logging.info("[<] " + str(datetime.now()) + ':: output+datecheck')
if
config
.
Since
and
config
.
Until
:
d
=
int
(
datestamp
.
replace
(
"-"
,
""
))
s
=
int
(
config
.
Since
.
replace
(
"-"
,
""
))
...
...
@@ -24,6 +28,7 @@ def datecheck(datestamp, config):
return
True
def
is_tweet
(
tw
):
#logging.info("[<] " + str(datetime.now()) + ':: output+is_tweet')
try
:
tw
.
find
(
"div"
)[
"data-item-id"
]
return
True
...
...
@@ -31,6 +36,7 @@ def is_tweet(tw):
return
False
def
_output
(
obj
,
output
,
config
,
**
extra
):
#logging.info("[<] " + str(datetime.now()) + ':: output+_output')
if
config
.
Lowercase
:
obj
.
username
=
obj
.
username
.
lower
()
for
i
in
range
(
len
(
obj
.
mentions
)):
...
...
@@ -68,6 +74,7 @@ def _output(obj, output, config, **extra):
print
(
"unicode error"
)
async
def
Tweets
(
tw
,
location
,
config
,
conn
):
#logging.info("[<] " + str(datetime.now()) + ':: output+Tweets')
copyright
=
tw
.
find
(
"div"
,
"StreamItemContent--withheld"
)
if
copyright
is
None
and
is_tweet
(
tw
):
tweet
=
Tweet
(
tw
,
location
,
config
)
...
...
@@ -86,6 +93,7 @@ async def Tweets(tw, location, config, conn):
_output
(
tweet
,
output
,
config
)
async
def
Users
(
u
,
config
,
conn
):
#logging.info("[<] " + str(datetime.now()) + ':: output+Users')
global
user_object
user
=
User
(
u
)
...
...
@@ -109,6 +117,7 @@ async def Users(u, config, conn):
_output
(
user
,
output
,
config
)
async
def
Username
(
username
,
config
,
conn
):
#logging.info("[<] " + str(datetime.now()) + ':: output+Username')
global
follow_object
follow_var
=
config
.
Following
*
"following"
+
config
.
Followers
*
"followers"
...
...
twint/run.py
View file @
d4672ee0
from
.
import
datelock
,
feed
,
get
,
output
,
verbose
,
storage
from
asyncio
import
get_event_loop
from
datetime
import
timedelta
from
datetime
import
timedelta
,
datetime
from
.storage
import
db
#import logging
class
Twint
:
def
__init__
(
self
,
config
):
#loggin.info("[<] " + str(datetime.now()) + ':: run+Twint+__init__')
if
config
.
Resume
is
not
None
and
config
.
TwitterSearch
:
self
.
init
=
f
"TWEET-{config.Resume}-0"
else
:
...
...
@@ -29,6 +32,7 @@ class Twint:
self
.
config
.
Timedelta
=
(
self
.
d
.
_until
-
self
.
d
.
_since
)
.
days
async
def
Feed
(
self
):
#loggin.info("[<] " + str(datetime.now()) + ':: run+Twint+Feed')
response
=
await
get
.
RequestUrl
(
self
.
config
,
self
.
init
)
if
self
.
config
.
Debug
:
print
(
response
,
file
=
open
(
"twint-last-request.log"
,
"w"
,
encoding
=
"utf-8"
))
...
...
@@ -50,6 +54,7 @@ class Twint:
pass
async
def
follow
(
self
):
#loggin.info("[<] " + str(datetime.now()) + ':: run+Twint+follow')
await
self
.
Feed
()
if
self
.
config
.
User_full
:
self
.
count
+=
await
get
.
Multi
(
self
.
feed
,
self
.
config
,
self
.
conn
)
...
...
@@ -60,10 +65,12 @@ class Twint:
await
output
.
Username
(
username
,
self
.
config
,
self
.
conn
)
async
def
favorite
(
self
):
#loggin.info("[<] " + str(datetime.now()) + ':: run+Twint+favorite')
await
self
.
Feed
()
self
.
count
+=
await
get
.
Multi
(
self
.
feed
,
self
.
config
,
self
.
conn
)
async
def
profile
(
self
):
#loggin.info("[<] " + str(datetime.now()) + ':: run+Twint+profile')
await
self
.
Feed
()
if
self
.
config
.
Profile_full
:
self
.
count
+=
await
get
.
Multi
(
self
.
feed
,
self
.
config
,
self
.
conn
)
...
...
@@ -73,6 +80,7 @@ class Twint:
await
output
.
Tweets
(
tweet
,
""
,
self
.
config
,
self
.
conn
)
async
def
tweets
(
self
):
#loggin.info("[<] " + str(datetime.now()) + ':: run+Twint+tweets')
await
self
.
Feed
()
if
self
.
config
.
Location
:
self
.
count
+=
await
get
.
Multi
(
self
.
feed
,
self
.
config
,
self
.
conn
)
...
...
@@ -82,6 +90,7 @@ class Twint:
await
output
.
Tweets
(
tweet
,
""
,
self
.
config
,
self
.
conn
)
async
def
main
(
self
):
#loggin.info("[<] " + str(datetime.now()) + ':: run+Twint+main')
if
self
.
config
.
User_id
is
not
None
:
self
.
config
.
Username
=
await
get
.
Username
(
self
.
config
.
User_id
)
...
...
@@ -96,6 +105,7 @@ class Twint:
self
.
d
.
_until
=
self
.
d
.
_until
-
_days
self
.
feed
=
[
-
1
]
#loggin.info("[<] " + str(datetime.now()) + ':: run+Twint+main+CallingGetLimit1')
if
get
.
Limit
(
self
.
config
.
Limit
,
self
.
count
):
self
.
d
.
_until
=
self
.
d
.
_until
-
_days
self
.
feed
=
[
-
1
]
...
...
@@ -113,6 +123,7 @@ class Twint:
else
:
break
#loggin.info("[<] " + str(datetime.now()) + ':: run+Twint+main+CallingGetLimit2')
if
get
.
Limit
(
self
.
config
.
Limit
,
self
.
count
):
break
...
...
@@ -120,13 +131,16 @@ class Twint:
verbose
.
Count
(
self
.
count
,
self
.
config
)
def
run
(
config
):
#loggin.info("[<] " + str(datetime.now()) + ':: run+run')
get_event_loop
()
.
run_until_complete
(
Twint
(
config
)
.
main
())
def
Favorites
(
config
):
#loggin.info("[<] " + str(datetime.now()) + ':: run+Favorites')
config
.
Favorites
=
True
run
(
config
)
def
Followers
(
config
):
#loggin.info("[<] " + str(datetime.now()) + ':: run+Followers')
output
.
clean_follow_list
()
config
.
Followers
=
True
config
.
Following
=
False
...
...
@@ -138,6 +152,7 @@ def Followers(config):
storage
.
panda
.
clean
()
def
Following
(
config
):
#loggin.info("[<] " + str(datetime.now()) + ':: run+Following')
output
.
clean_follow_list
()
config
.
Following
=
True
config
.
Followers
=
False
...
...
@@ -149,10 +164,12 @@ def Following(config):
storage
.
panda
.
clean
()
def
Profile
(
config
):
config
.
Profile
=
True
#loggin.info("[<] " + str(datetime.now()) + ':: run+Profile')
run
(
config
)
def
Search
(
config
):
#loggin.info("[<] " + str(datetime.now()) + ':: run+Search')
config
.
TwitterSearch
=
True
config
.
Following
=
False
config
.
Followers
=
False
...
...
twint/tweet.py
View file @
d4672ee0
from
time
import
strftime
,
localtime
import
re
#from datetime import datetime
#import logging
class
tweet
:
"""Define Tweet class
...
...
@@ -10,6 +12,7 @@ class tweet:
pass
def
getMentions
(
tw
):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getMentions')
"""Extract ment from tweet
"""
try
:
...
...
@@ -20,6 +23,7 @@ def getMentions(tw):
return
mentions
def
getText
(
tw
):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getText')
"""Replace some text
"""
text
=
tw
.
find
(
"p"
,
"tweet-text"
)
.
text
...
...
@@ -30,6 +34,7 @@ def getText(tw):
return
text
def
getTweet
(
tw
,
mentions
):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getTweet')
try
:
text
=
getText
(
tw
)
for
i
in
range
(
len
(
mentions
)):
...
...
@@ -42,6 +47,7 @@ def getTweet(tw, mentions):
return
text
def
getHashtags
(
text
):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getHashtags')
"""Get hashtags of tweet
"""
return
re
.
findall
(
r'(?i)\#\w+'
,
text
,
flags
=
re
.
UNICODE
)
...
...
@@ -49,14 +55,17 @@ def getHashtags(text):
def
getStat
(
tw
,
_type
):
"""Get stats about Tweet
"""
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getStat')
st
=
f
"ProfileTweet-action--{_type} u-hiddenVisually"
return
tw
.
find
(
"span"
,
st
)
.
find
(
"span"
)[
"data-tweet-stat-count"
]
def
getRetweet
(
profile
,
username
,
user
):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getRetweet')
if
profile
and
username
.
lower
()
!=
user
:
return
True
def
getUser_rt
(
profile
,
username
,
user
):
#logging.info("[<] " + str(datetime.now()) + ':: tweet+getUser_rt')
"""Get username that retweeted
"""
if
getRetweet
(
profile
,
username
,
user
):
...
...
@@ -69,6 +78,7 @@ def getUser_rt(profile, username, user):
def
Tweet
(
tw
,
location
,
config
):
"""Create Tweet object
"""
##logging.info("[<] " + str(datetime.now()) + ':: tweet+Tweet')
t
=
tweet
()
t
.
id
=
tw
.
find
(
"div"
)[
"data-item-id"
]
t
.
datetime
=
int
(
tw
.
find
(
"span"
,
"_timestamp"
)[
"data-time"
])
...
...
twint/url.py
View file @
d4672ee0
#from datetime import datetime
#import logging
mobile
=
"https://mobile.twitter.com"
base
=
"https://twitter.com/i"
async
def
Favorites
(
username
,
init
):
#logging.info("[<] " + str(datetime.now()) + ':: url+Favorites')
url
=
f
"{mobile}/{username}/favorites?lang=en"
if
init
!=
-
1
:
...
...
@@ -10,6 +14,7 @@ async def Favorites(username, init):
return
url
async
def
Followers
(
username
,
init
):
#logging.info("[<] " + str(datetime.now()) + ':: url+Followers')
url
=
f
"{mobile}/{username}/followers?lang=en"
if
init
!=
-
1
:
...
...
@@ -18,6 +23,7 @@ async def Followers(username, init):
return
url
async
def
Following
(
username
,
init
):
#logging.info("[<] " + str(datetime.now()) + ':: url+Following')
url
=
f
"{mobile}/{username}/following?lang=en"
if
init
!=
-
1
:
...
...
@@ -26,6 +32,7 @@ async def Following(username, init):
return
url
async
def
MobileProfile
(
username
,
init
):
#logging.info("[<] " + str(datetime.now()) + ':: url+MobileProfile')
url
=
f
"{mobile}/{username}?lang=en"
if
init
!=
-
1
:
...
...
@@ -34,6 +41,7 @@ async def MobileProfile(username, init):
return
url
async
def
Profile
(
username
,
init
):
#logging.info("[<] " + str(datetime.now()) + ':: url+Profile')
url
=
f
"{base}/profiles/show/{username}/timeline/tweets?include_"
url
+=
"available_features=1&lang=en&include_entities=1"
url
+=
"&include_new_items_bar=true"
...
...
@@ -44,6 +52,7 @@ async def Profile(username, init):
return
url
async
def
Search
(
config
,
init
):
#logging.info("[<] " + str(datetime.now()) + ':: url+Search')
url
=
f
"{base}/search/timeline?f=tweets&vertical=default&lang=en"
url
+=
"&include_available_features=1&include_entities=1&"
url
+=
f
"reset_error_state=false&src=typd&qf=off&max_position={init}&q="
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment