Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
T
Twint
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Packages
Packages
List
Container Registry
Analytics
Analytics
CI / CD
Code Review
Insights
Issues
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nanahira
Twint
Commits
25a9d99d
Commit
25a9d99d
authored
Feb 02, 2019
by
Francesco Poldi
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fixed logger
parent
95bebc68
Changes
12
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
175 additions
and
199 deletions
+175
-199
twint/__init__.py
twint/__init__.py
+20
-9
twint/__version__.py
twint/__version__.py
+1
-1
twint/_logme.py
twint/_logme.py
+0
-24
twint/datelock.py
twint/datelock.py
+2
-4
twint/feed.py
twint/feed.py
+7
-9
twint/format.py
twint/format.py
+5
-7
twint/get.py
twint/get.py
+34
-35
twint/output.py
twint/output.py
+42
-42
twint/run.py
twint/run.py
+41
-39
twint/tweet.py
twint/tweet.py
+7
-9
twint/url.py
twint/url.py
+7
-9
twint/user.py
twint/user.py
+9
-11
No files found.
twint/__init__.py
View file @
25a9d99d
...
...
@@ -2,19 +2,30 @@
TWINT - Twitter Intelligence Tool (formerly known as Tweep).
See wiki on Github for in-depth details.
https://github.com/
haccer
/twint/wiki
https://github.com/
twintproject
/twint/wiki
Licensed under MIT License
Copyright (c) 2018 Cody Zacharias
'''
import
logging
,
os
from
.config
import
Config
from
.
import
run
#import logging
#logger = logging.getLogger()
#handler = logging.FileHandler('twint.log')
#formatter = logging.Formatter(
# '%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
#handler.setFormatter(formatter)
#logger.addHandler(handler)
#logger.setLevel(logging.DEBUG)
\ No newline at end of file
_levels
=
{
'info'
:
logging
.
INFO
,
'debug'
:
logging
.
DEBUG
}
_level
=
os
.
getenv
(
'TWINT_DEBUG'
,
'info'
)
_logLevel
=
_levels
[
_level
]
if
_level
==
"debug"
:
logger
=
logging
.
getLogger
()
_output_fn
=
'twint.log'
logger
.
setLevel
(
_logLevel
)
formatter
=
logging
.
Formatter
(
'
%(levelname)
s:
%(asctime)
s:
%(name)
s:
%(message)
s'
)
fileHandler
=
logging
.
FileHandler
(
_output_fn
)
fileHandler
.
setLevel
(
_logLevel
)
fileHandler
.
setFormatter
(
formatter
)
logger
.
addHandler
(
fileHandler
)
twint/__version__.py
View file @
25a9d99d
VERSION
=
(
1
,
2
,
1
)
__version__
=
'.'
.
join
(
map
(
str
,
VERSION
))
__version__
=
'.'
.
join
(
map
(
str
,
VERSION
))
\ No newline at end of file
twint/_logme.py
deleted
100644 → 0
View file @
95bebc68
import
logging
LEVEL
=
logging
.
INFO
class
_logger
:
def
__init__
(
self
,
loggerName
):
self
.
_level
=
LEVEL
self
.
_output_fn
=
'twint.log'
self
.
logger
=
logging
.
getLogger
(
loggerName
)
self
.
logger
.
setLevel
(
self
.
_level
)
self
.
formatter
=
logging
.
Formatter
(
'
%(levelname)
s:
%(asctime)
s:
%(name)
s:
%(message)
s'
)
self
.
fileHandler
=
logging
.
FileHandler
(
self
.
_output_fn
)
self
.
fileHandler
.
setLevel
(
self
.
_level
)
self
.
fileHandler
.
setFormatter
(
self
.
formatter
)
self
.
logger
.
addHandler
(
self
.
fileHandler
)
def
critical
(
self
,
message
):
self
.
logger
.
critical
(
message
)
def
info
(
self
,
message
):
self
.
logger
.
info
(
message
)
def
debug
(
self
,
message
):
self
.
logger
.
debug
(
message
)
\ No newline at end of file
twint/datelock.py
View file @
25a9d99d
import
datetime
from
.
import
_logme
logme
=
_logme
.
_logger
(
__name__
)
import
logging
as
logme
class
Datelock
:
_until
=
None
...
...
@@ -10,7 +8,7 @@ class Datelock:
_since_def_user
=
None
def
Set
(
Until
,
Since
):
logme
.
debug
(
'
Set'
)
logme
.
debug
(
__name__
+
':
Set'
)
d
=
Datelock
()
if
Until
:
...
...
twint/feed.py
View file @
25a9d99d
...
...
@@ -2,36 +2,34 @@ from bs4 import BeautifulSoup
from
re
import
findall
from
json
import
loads
from
.
import
_logme
logme
=
_logme
.
_logger
(
__name__
)
import
logging
as
logme
def
Follow
(
response
):
logme
.
debug
(
'
Follow'
)
logme
.
debug
(
__name__
+
':
Follow'
)
soup
=
BeautifulSoup
(
response
,
"html.parser"
)
follow
=
soup
.
find_all
(
"td"
,
"info fifty screenname"
)
cursor
=
soup
.
find_all
(
"div"
,
"w-button-more"
)
try
:
cursor
=
findall
(
r'cursor=(.*?)">'
,
str
(
cursor
))[
0
]
except
IndexError
:
logme
.
critical
(
'
Follow:IndexError'
)
logme
.
critical
(
__name__
+
':
Follow:IndexError'
)
return
follow
,
cursor
def
Mobile
(
response
):
logme
.
debug
(
'
Mobile'
)
logme
.
debug
(
__name__
+
':
Mobile'
)
soup
=
BeautifulSoup
(
response
,
"html.parser"
)
tweets
=
soup
.
find_all
(
"span"
,
"metadata"
)
max_id
=
soup
.
find_all
(
"div"
,
"w-button-more"
)
try
:
max_id
=
findall
(
r'max_id=(.*?)">'
,
str
(
max_id
))[
0
]
except
Exception
as
e
:
logme
.
critical
(
'
Mobile:'
+
str
(
e
))
logme
.
critical
(
__name__
+
':
Mobile:'
+
str
(
e
))
return
tweets
,
max_id
def
profile
(
response
):
logme
.
debug
(
'
profile'
)
logme
.
debug
(
__name__
+
':
profile'
)
json_response
=
loads
(
response
)
html
=
json_response
[
"items_html"
]
soup
=
BeautifulSoup
(
html
,
"html.parser"
)
...
...
@@ -40,7 +38,7 @@ def profile(response):
return
feed
,
feed
[
-
1
][
"data-item-id"
]
def
Json
(
response
):
logme
.
debug
(
'
Json'
)
logme
.
debug
(
__name__
+
':
Json'
)
json_response
=
loads
(
response
)
html
=
json_response
[
"items_html"
]
soup
=
BeautifulSoup
(
html
,
"html.parser"
)
...
...
twint/format.py
View file @
25a9d99d
from
.
import
_logme
logme
=
_logme
.
_logger
(
__name__
)
import
logging
as
logme
def
Tweet
(
config
,
t
):
if
config
.
Format
:
logme
.
debug
(
'
Tweet:Format'
)
logme
.
debug
(
__name__
+
':
Tweet:Format'
)
output
=
config
.
Format
.
replace
(
"{id}"
,
t
.
id_str
)
output
=
output
.
replace
(
"{date}"
,
t
.
datestamp
)
output
=
output
.
replace
(
"{time}"
,
t
.
timestamp
)
...
...
@@ -21,7 +19,7 @@ def Tweet(config, t):
output
=
output
.
replace
(
"{is_retweet}"
,
str
(
t
.
retweet
))
output
=
output
.
replace
(
"{mentions}"
,
str
(
t
.
mentions
))
else
:
logme
.
debug
(
'
Tweet:notFormat'
)
logme
.
debug
(
__name__
+
':
Tweet:notFormat'
)
output
=
f
"{t.id_str} {t.datestamp} {t.timestamp} {t.timezone} "
if
t
.
retweet
==
1
:
...
...
@@ -41,7 +39,7 @@ def Tweet(config, t):
def
User
(
_format
,
u
):
if
_format
:
logme
.
debug
(
'
User:Format'
)
logme
.
debug
(
__name__
+
':
User:Format'
)
output
=
_format
.
replace
(
"{id}"
,
u
.
id
)
output
+=
output
.
replace
(
"{name}"
,
u
.
name
)
output
+=
output
.
replace
(
"{username}"
,
u
.
username
)
...
...
@@ -59,7 +57,7 @@ def User(_format, u):
output
+=
output
.
replace
(
"{verified}"
,
str
(
u
.
is_verified
))
output
+=
output
.
replace
(
"{avatar}"
,
u
.
avatar
)
else
:
logme
.
debug
(
'
User:notFormat'
)
logme
.
debug
(
__name__
+
':
User:notFormat'
)
output
=
f
"{u.id} | {u.name} | @{u.username} | Private: "
output
+=
f
"{u.is_private} | Verified: {u.is_verified} |"
output
+=
f
" Bio: {u.bio} | Location: {u.location} | Url: "
...
...
twint/get.py
View file @
25a9d99d
...
...
@@ -13,14 +13,13 @@ from aiohttp_socks import SocksConnector, SocksVer
from
.
import
url
from
.output
import
Tweets
,
Users
from
.user
import
inf
from
.
import
_logme
logme
=
_logme
.
_logger
(
__name__
)
import
logging
as
logme
httpproxy
=
None
def
get_connector
(
config
):
logme
.
debug
(
'
get_connector'
)
logme
.
debug
(
__name__
+
':
get_connector'
)
_connector
=
None
if
config
.
Proxy_host
is
not
None
:
if
config
.
Proxy_host
.
lower
()
==
"tor"
:
...
...
@@ -48,12 +47,12 @@ def get_connector(config):
port
=
config
.
Proxy_port
,
rdns
=
True
)
else
:
logme
.
critical
(
'
get_connector:proxy-port-type-error'
)
logme
.
critical
(
__name__
+
':
get_connector:proxy-port-type-error'
)
print
(
"Error: Please specify --proxy-host, --proxy-port, and --proxy-type"
)
sys
.
exit
(
1
)
else
:
if
config
.
Proxy_port
or
config
.
Proxy_type
:
logme
.
critical
(
'
get_connector:proxy-host-arg-error'
)
logme
.
critical
(
__name__
+
':
get_connector:proxy-host-arg-error'
)
print
(
"Error: Please specify --proxy-host, --proxy-port, and --proxy-type"
)
sys
.
exit
(
1
)
...
...
@@ -61,31 +60,31 @@ def get_connector(config):
async
def
RequestUrl
(
config
,
init
,
headers
=
[]):
logme
.
debug
(
'
RequestUrl'
)
logme
.
debug
(
__name__
+
':
RequestUrl'
)
_connector
=
get_connector
(
config
)
if
config
.
Profile
:
if
config
.
Profile_full
:
logme
.
debug
(
'
RequestUrl:Profile_full'
)
logme
.
debug
(
__name__
+
':
RequestUrl:Profile_full'
)
_url
=
await
url
.
MobileProfile
(
config
.
Username
,
init
)
response
=
await
MobileRequest
(
_url
,
connector
=
_connector
)
else
:
logme
.
debug
(
'
RequestUrl:notProfile_full'
)
logme
.
debug
(
__name__
+
':
RequestUrl:notProfile_full'
)
_url
=
await
url
.
Profile
(
config
.
Username
,
init
)
response
=
await
Request
(
_url
,
connector
=
_connector
,
headers
=
headers
)
elif
config
.
TwitterSearch
:
logme
.
debug
(
'
RequestUrl:TwitterSearch'
)
logme
.
debug
(
__name__
+
':
RequestUrl:TwitterSearch'
)
_url
,
params
=
await
url
.
Search
(
config
,
init
)
response
=
await
Request
(
_url
,
params
=
params
,
connector
=
_connector
,
headers
=
headers
)
else
:
if
config
.
Following
:
logme
.
debug
(
'
RequestUrl:Following'
)
logme
.
debug
(
__name__
+
':
RequestUrl:Following'
)
_url
=
await
url
.
Following
(
config
.
Username
,
init
)
elif
config
.
Followers
:
logme
.
debug
(
'
RequestUrl:Followers'
)
logme
.
debug
(
__name__
+
':
RequestUrl:Followers'
)
_url
=
await
url
.
Followers
(
config
.
Username
,
init
)
else
:
logme
.
debug
(
'
RequestUrl:Favorites'
)
logme
.
debug
(
__name__
+
':
RequestUrl:Favorites'
)
_url
=
await
url
.
Favorites
(
config
.
Username
,
init
)
response
=
await
MobileRequest
(
_url
,
connector
=
_connector
)
...
...
@@ -97,51 +96,51 @@ async def RequestUrl(config, init, headers = []):
async
def
MobileRequest
(
url
,
**
options
):
connector
=
options
.
get
(
"connector"
)
if
connector
:
logme
.
debug
(
'
MobileRequest:Connector'
)
logme
.
debug
(
__name__
+
':
MobileRequest:Connector'
)
async
with
aiohttp
.
ClientSession
(
connector
=
connector
)
as
session
:
return
await
Response
(
session
,
url
)
logme
.
debug
(
'
MobileRequest:notConnector'
)
logme
.
debug
(
__name__
+
':
MobileRequest:notConnector'
)
async
with
aiohttp
.
ClientSession
()
as
session
:
return
await
Response
(
session
,
url
)
def
ForceNewTorIdentity
(
config
):
logme
.
debug
(
'
ForceNewTorIdentity'
)
logme
.
debug
(
__name__
+
':
ForceNewTorIdentity'
)
try
:
tor_c
=
socket
.
create_connection
((
'127.0.0.1'
,
config
.
Tor_control_port
))
tor_c
.
send
(
'AUTHENTICATE "{}"
\r\n
SIGNAL NEWNYM
\r\n
'
.
format
(
config
.
Tor_control_password
)
.
encode
())
response
=
tor_c
.
recv
(
1024
)
if
response
!=
b
'250 OK
\r\n
250 OK
\r\n
'
:
sys
.
stderr
.
write
(
'Unexpected response from Tor control port: {}
\n
'
.
format
(
response
))
logme
.
critical
(
'
ForceNewTorIdentity:unexpectedResponse'
)
logme
.
critical
(
__name__
+
':
ForceNewTorIdentity:unexpectedResponse'
)
except
Exception
as
e
:
logme
.
debug
(
'
ForceNewTorIdentity:errorConnectingTor'
)
logme
.
debug
(
__name__
+
':
ForceNewTorIdentity:errorConnectingTor'
)
sys
.
stderr
.
write
(
'Error connecting to Tor control port: {}
\n
'
.
format
(
repr
(
e
)))
sys
.
stderr
.
write
(
'If you want to rotate Tor ports automatically - enable Tor control port
\n
'
)
async
def
Request
(
url
,
connector
=
None
,
params
=
[],
headers
=
[]):
if
connector
:
logme
.
debug
(
'
Request:Connector'
)
logme
.
debug
(
__name__
+
':
Request:Connector'
)
async
with
aiohttp
.
ClientSession
(
connector
=
connector
,
headers
=
headers
)
as
session
:
return
await
Response
(
session
,
url
,
params
)
logme
.
debug
(
'
Request:notConnector'
)
logme
.
debug
(
__name__
+
':
Request:notConnector'
)
async
with
aiohttp
.
ClientSession
()
as
session
:
return
await
Response
(
session
,
url
,
params
)
async
def
Response
(
session
,
url
,
params
=
[]):
logme
.
debug
(
'
Response'
)
logme
.
debug
(
__name__
+
':
Response'
)
with
timeout
(
30
):
async
with
session
.
get
(
url
,
ssl
=
False
,
params
=
params
,
proxy
=
httpproxy
)
as
response
:
return
await
response
.
text
()
async
def
RandomUserAgent
():
logme
.
debug
(
'
RandomUserAgent'
)
logme
.
debug
(
__name__
+
':
RandomUserAgent'
)
url
=
"https://fake-useragent.herokuapp.com/browsers/0.1.8"
r
=
await
Request
(
url
)
browsers
=
loads
(
r
)[
'browsers'
]
return
random
.
choice
(
browsers
[
random
.
choice
(
list
(
browsers
))])
async
def
Username
(
_id
):
logme
.
debug
(
'
Username'
)
logme
.
debug
(
__name__
+
':
Username'
)
url
=
f
"https://twitter.com/intent/user?user_id={_id}&lang=en"
r
=
await
Request
(
url
)
soup
=
BeautifulSoup
(
r
,
"html.parser"
)
...
...
@@ -149,7 +148,7 @@ async def Username(_id):
return
soup
.
find
(
"a"
,
"fn url alternate-context"
)[
"href"
]
.
replace
(
"/"
,
""
)
async
def
Tweet
(
url
,
config
,
conn
):
logme
.
debug
(
'
Tweet'
)
logme
.
debug
(
__name__
+
':
Tweet'
)
try
:
response
=
await
Request
(
url
)
soup
=
BeautifulSoup
(
response
,
"html.parser"
)
...
...
@@ -158,11 +157,11 @@ async def Tweet(url, config, conn):
tweets
=
soup
.
find_all
(
"div"
,
"tweet"
)
await
Tweets
(
tweets
,
location
,
config
,
conn
,
url
)
except
Exception
as
e
:
logme
.
critical
(
'
Tweet:'
+
str
(
e
))
logme
.
critical
(
__name__
+
':
Tweet:'
+
str
(
e
))
print
(
str
(
e
)
+
" [x] get.Tweet"
)
async
def
User
(
url
,
config
,
conn
,
user_id
=
False
):
logme
.
debug
(
'
User'
)
logme
.
debug
(
__name__
+
':
User'
)
_connector
=
get_connector
(
config
)
try
:
response
=
await
Request
(
url
,
connector
=
_connector
)
...
...
@@ -170,16 +169,16 @@ async def User(url, config, conn, user_id = False):
if
user_id
:
return
int
(
inf
(
soup
,
"id"
))
except
Exception
as
e
:
logme
.
critical
(
'
User:'
+
str
(
e
))
logme
.
critical
(
__name__
+
':
User:'
+
str
(
e
))
print
(
str
(
e
)
+
" [x] get.User"
)
def
Limit
(
Limit
,
count
):
logme
.
critical
(
'
Limit'
)
logme
.
debug
(
__name__
+
':
Limit'
)
if
Limit
is
not
None
and
count
>=
int
(
Limit
):
return
True
async
def
Multi
(
feed
,
config
,
conn
):
logme
.
debug
(
'
Multi'
)
logme
.
debug
(
__name__
+
':
Multi'
)
count
=
0
try
:
with
concurrent
.
futures
.
ThreadPoolExecutor
(
max_workers
=
20
)
as
executor
:
...
...
@@ -188,34 +187,34 @@ async def Multi(feed, config, conn):
for
tweet
in
feed
:
count
+=
1
if
config
.
Favorites
or
config
.
Profile_full
:
logme
.
debug
(
'
Multi:Favorites-profileFull'
)
logme
.
debug
(
__name__
+
':
Multi:Favorites-profileFull'
)
link
=
tweet
.
find
(
"a"
)[
"href"
]
url
=
f
"https://twitter.com{link}&lang=en"
elif
config
.
User_full
:
logme
.
debug
(
'
Multi:userFull'
)
logme
.
debug
(
__name__
+
':
Multi:userFull'
)
username
=
tweet
.
find
(
"a"
)[
"name"
]
url
=
f
"http://twitter.com/{username}?lang=en"
else
:
logme
.
debug
(
'
Multi:else-url'
)
logme
.
debug
(
__name__
+
':
Multi:else-url'
)
link
=
tweet
.
find
(
"a"
,
"tweet-timestamp js-permalink js-nav js-tooltip"
)[
"href"
]
url
=
f
"https://twitter.com{link}?lang=en"
if
config
.
User_full
:
logme
.
debug
(
'
Multi:user-full-Run'
)
logme
.
debug
(
__name__
+
':
Multi:user-full-Run'
)
futures
.
append
(
loop
.
run_in_executor
(
executor
,
await
User
(
url
,
config
,
conn
)))
else
:
logme
.
debug
(
'
Multi:notUser-full-Run'
)
logme
.
debug
(
__name__
+
':
Multi:notUser-full-Run'
)
futures
.
append
(
loop
.
run_in_executor
(
executor
,
await
Tweet
(
url
,
config
,
conn
)))
logme
.
debug
(
'
Multi:asyncioGather'
)
logme
.
debug
(
__name__
+
':
Multi:asyncioGather'
)
await
asyncio
.
gather
(
*
futures
)
except
Exception
as
e
:
# TODO: fix error not error
# print(str(e) + " [x] get.Multi")
# will return "'NoneType' object is not callable"
# but still works
logme
.
critical
(
'
Multi:'
+
str
(
e
))
logme
.
critical
(
__name__
+
':
Multi:'
+
str
(
e
))
pass
return
count
twint/output.py
View file @
25a9d99d
...
...
@@ -4,9 +4,8 @@ from . import format, get
from
.tweet
import
Tweet
from
.user
import
User
from
.storage
import
db
,
elasticsearch
,
write
,
panda
from
.
import
_logme
logme
=
_logme
.
_logger
(
__name__
)
import
logging
as
logme
follow_object
=
{}
tweets_object
=
[]
...
...
@@ -18,41 +17,41 @@ author_list.pop()
_follow_list
=
[]
def
clean_follow_list
():
logme
.
debug
(
'
clean_follow_list'
)
logme
.
debug
(
__name__
+
':
clean_follow_list'
)
global
_follow_list
_follow_list
=
[]
def
datecheck
(
datestamp
,
config
):
logme
.
debug
(
'
datecheck'
)
logme
.
debug
(
__name__
+
':
datecheck'
)
if
config
.
Since
and
config
.
Until
:
logme
.
debug
(
'
datecheck:dateRangeTrue'
)
logme
.
debug
(
__name__
+
':
datecheck:dateRangeTrue'
)
d
=
int
(
datestamp
.
replace
(
"-"
,
""
))
s
=
int
(
config
.
Since
.
replace
(
"-"
,
""
))
if
d
<
s
:
return
False
logme
.
debug
(
'
datecheck:dateRangeFalse'
)
logme
.
debug
(
__name__
+
':
datecheck:dateRangeFalse'
)
return
True
def
is_tweet
(
tw
):
try
:
tw
[
"data-item-id"
]
logme
.
debug
(
'
is_tweet:True'
)
logme
.
debug
(
__name__
+
':
is_tweet:True'
)
return
True
except
:
logme
.
critical
(
'
is_tweet:False'
)
logme
.
critical
(
__name__
+
':
is_tweet:False'
)
return
False
def
_output
(
obj
,
output
,
config
,
**
extra
):
logme
.
debug
(
'
_output'
)
logme
.
debug
(
__name__
+
':
_output'
)
if
config
.
Lowercase
:
if
isinstance
(
obj
,
str
):
logme
.
debug
(
'
_output:Lowercase:username'
)
logme
.
debug
(
__name__
+
':
_output:Lowercase:username'
)
obj
=
obj
.
lower
()
elif
obj
.
__class__
.
__name__
==
"user"
:
logme
.
debug
(
'
_output:Lowercase:user'
)
logme
.
debug
(
__name__
+
':
_output:Lowercase:user'
)
pass
elif
obj
.
__class__
.
__name__
==
"tweet"
:
logme
.
debug
(
'
_output:Lowercase:tweet'
)
logme
.
debug
(
__name__
+
':
_output:Lowercase:tweet'
)
obj
.
username
=
obj
.
username
.
lower
()
author_list
.
update
({
obj
.
username
})
for
i
in
range
(
len
(
obj
.
mentions
)):
...
...
@@ -67,45 +66,45 @@ def _output(obj, output, config, **extra):
if
config
.
Store_csv
:
try
:
write
.
Csv
(
obj
,
config
)
logme
.
debug
(
'
_output:CSV'
)
logme
.
debug
(
__name__
+
':
_output:CSV'
)
except
Exception
as
e
:
logme
.
critical
(
'
_output:CSV:Error:'
+
str
(
e
))
logme
.
critical
(
__name__
+
':
_output:CSV:Error:'
+
str
(
e
))
print
(
str
(
e
)
+
" [x] output._output"
)
elif
config
.
Store_json
:
write
.
Json
(
obj
,
config
)
logme
.
debug
(
'
_output:JSON'
)
logme
.
debug
(
__name__
+
':
_output:JSON'
)
else
:
write
.
Text
(
output
,
config
.
Output
)
logme
.
debug
(
'
_output:Text'
)
logme
.
debug
(
__name__
+
':
_output:Text'
)
if
config
.
Pandas
and
obj
.
type
==
"user"
:
logme
.
debug
(
'
_output:Pandas+user'
)
logme
.
debug
(
__name__
+
':
_output:Pandas+user'
)
panda
.
update
(
obj
,
config
)
if
extra
.
get
(
"follow_list"
):
logme
.
debug
(
'
_output:follow_list'
)
logme
.
debug
(
__name__
+
':
_output:follow_list'
)
follow_object
.
username
=
config
.
Username
follow_object
.
action
=
config
.
Following
*
"following"
+
config
.
Followers
*
"followers"
follow_object
.
users
=
_follow_list
panda
.
update
(
follow_object
,
config
.
Essid
)
if
config
.
Elasticsearch
:
logme
.
debug
(
'
_output:Elasticsearch'
)
logme
.
debug
(
__name__
+
':
_output:Elasticsearch'
)
print
(
""
,
end
=
"."
,
flush
=
True
)
else
:
if
not
config
.
Hide_output
:
try
:
print
(
output
)
except
UnicodeEncodeError
:
logme
.
critical
(
'
_output:UnicodeEncodeError'
)
logme
.
critical
(
__name__
+
':
_output:UnicodeEncodeError'
)
print
(
"unicode error [x] output._output"
)
async
def
checkData
(
tweet
,
location
,
config
,
conn
):
logme
.
debug
(
'
checkData'
)
logme
.
debug
(
__name__
+
':
checkData'
)
copyright
=
tweet
.
find
(
"div"
,
"StreamItemContent--withheld"
)
if
copyright
is
None
and
is_tweet
(
tweet
):
tweet
=
Tweet
(
tweet
,
location
,
config
)
if
not
tweet
.
datestamp
:
logme
.
critical
(
'
checkData:hiddenTweetFound'
)
logme
.
critical
(
__name__
+
':
checkData:hiddenTweetFound'
)
print
(
"[x] Hidden tweet found, account suspended due to violation of TOS"
)
return
...
...
@@ -113,56 +112,57 @@ async def checkData(tweet, location, config, conn):
output
=
format
.
Tweet
(
config
,
tweet
)
if
config
.
Database
:
logme
.
debug
(
'
checkData:Database'
)
logme
.
debug
(
__name__
+
':
checkData:Database'
)
db
.
tweets
(
conn
,
tweet
,
config
)
if
config
.
Pandas
:
logme
.
debug
(
'
checkData:Pandas'
)
logme
.
debug
(
__name__
+
':
checkData:Pandas'
)
panda
.
update
(
tweet
,
config
)
if
config
.
Store_object
:
logme
.
debug
(
'
checkData:Store_object'
)
logme
.
debug
(
__name__
+
':
checkData:Store_object'
)
tweets_object
.
append
(
tweet
)
if
config
.
Elasticsearch
:
logme
.
debug
(
'
checkData:Elasticsearch'
)
logme
.
debug
(
__name__
+
':
checkData:Elasticsearch'
)
elasticsearch
.
Tweet
(
tweet
,
config
)
_output
(
tweet
,
output
,
config
)
logme
.
critical
(
'checkData:copyrightedTweet'
)
else
:
logme
.
critical
(
__name__
+
':checkData:copyrightedTweet'
)
async
def
Tweets
(
tweets
,
location
,
config
,
conn
,
url
=
''
):
logme
.
debug
(
'
Tweets'
)
logme
.
debug
(
__name__
+
':
Tweets'
)
if
(
config
.
Profile_full
or
config
.
Location
)
and
config
.
Get_replies
:
logme
.
debug
(
'
Tweets:full+loc+replies'
)
logme
.
debug
(
__name__
+
':
Tweets:full+loc+replies'
)
for
tw
in
tweets
:
await
checkData
(
tw
,
location
,
config
,
conn
)
elif
config
.
Favorites
or
config
.
Profile_full
or
config
.
Location
:
logme
.
debug
(
'
Tweets:fav+full+loc'
)
logme
.
debug
(
__name__
+
':
Tweets:fav+full+loc'
)
for
tw
in
tweets
:
if
tw
[
'data-item-id'
]
==
url
.
split
(
'?'
)[
0
]
.
split
(
'/'
)[
-
1
]:
await
checkData
(
tw
,
location
,
config
,
conn
)
elif
config
.
TwitterSearch
:
logme
.
debug
(
'
Tweets:TwitterSearch'
)
await
checkData
(
tweets
,
location
,
config
,
conn
)
logme
.
debug
(
__name__
+
':
Tweets:TwitterSearch'
)
await
checkData
(
tweets
,
location
,
config
,
conn
)
else
:
logme
.
debug
(
'
Tweets:else'
)
logme
.
debug
(
__name__
+
':
Tweets:else'
)
if
int
(
tweets
[
"data-user-id"
])
==
config
.
User_id
:
await
checkData
(
tweets
,
location
,
config
,
conn
)
async
def
Users
(
u
,
config
,
conn
):
logme
.
debug
(
'
User'
)
logme
.
debug
(
__name__
+
':
User'
)
global
user_object
user
=
User
(
u
)
output
=
format
.
User
(
config
.
Format
,
user
)
if
config
.
Database
:
logme
.
debug
(
'
User:Database'
)
logme
.
debug
(
__name__
+
':
User:Database'
)
db
.
user
(
conn
,
config
,
user
)
if
config
.
Elasticsearch
:
logme
.
debug
(
'
User:Elasticsearch'
)
logme
.
debug
(
__name__
+
':
User:Elasticsearch'
)
_save_date
=
user
.
join_date
_save_time
=
user
.
join_time
user
.
join_date
=
str
(
datetime
.
strptime
(
user
.
join_date
,
"
%
d
%
b
%
Y"
))
.
split
()[
0
]
...
...
@@ -172,32 +172,32 @@ async def Users(u, config, conn):
user
.
join_time
=
_save_time
if
config
.
Store_object
:
logme
.
debug
(
'
User:Store_object'
)
logme
.
debug
(
__name__
+
':
User:Store_object'
)
user_object
.
append
(
user
)
# twint.user.user
_output
(
user
,
output
,
config
)
async
def
Username
(
username
,
config
,
conn
):
logme
.
debug
(
'
Username'
)
logme
.
debug
(
__name__
+
':
Username'
)
global
follow_object
follow_var
=
config
.
Following
*
"following"
+
config
.
Followers
*
"followers"
if
config
.
Database
:
logme
.
debug
(
'
Username:Database'
)
logme
.
debug
(
__name__
+
':
Username:Database'
)
db
.
follow
(
conn
,
config
.
Username
,
config
.
Followers
,
username
)
if
config
.
Elasticsearch
:
logme
.
debug
(
'
Username:Elasticsearch'
)
logme
.
debug
(
__name__
+
':
Username:Elasticsearch'
)
elasticsearch
.
Follow
(
username
,
config
)
if
config
.
Store_object
or
config
.
Pandas
:
logme
.
debug
(
'
Username:object+pandas'
)
logme
.
debug
(
__name__
+
':
Username:object+pandas'
)
try
:
_
=
follow_object
[
config
.
Username
][
follow_var
]
except
KeyError
:
follow_object
.
update
({
config
.
Username
:
{
follow_var
:
[]}})
follow_object
[
config
.
Username
][
follow_var
]
.
append
(
username
)
if
config
.
Pandas_au
:
logme
.
debug
(
'
Username:object+pandas+au'
)
logme
.
debug
(
__name__
+
':
Username:object+pandas+au'
)
panda
.
update
(
follow_object
[
config
.
Username
],
config
)
_output
(
username
,
username
,
config
,
follow_list
=
_follow_list
)
twint/run.py
View file @
25a9d99d
This diff is collapsed.
Click to expand it.
twint/tweet.py
View file @
25a9d99d
from
time
import
strftime
,
localtime
import
json
from
.
import
_logme
logme
=
_logme
.
_logger
(
__name__
)
import
logging
as
logme
class
tweet
:
"""Define Tweet class
...
...
@@ -16,7 +14,7 @@ class tweet:
def
getMentions
(
tw
):
"""Extract ment from tweet
"""
logme
.
debug
(
'
getMentions'
)
logme
.
debug
(
__name__
+
':
getMentions'
)
try
:
mentions
=
tw
[
"data-mentions"
]
.
split
(
" "
)
except
:
...
...
@@ -27,7 +25,7 @@ def getMentions(tw):
def
getQuoteURL
(
tw
):
"""Extract quote from tweet
"""
logme
.
debug
(
'
getQuoteURL'
)
logme
.
debug
(
__name__
+
':
getQuoteURL'
)
base_twitter
=
"https://twitter.com"
quote_url
=
""
try
:
...
...
@@ -41,7 +39,7 @@ def getQuoteURL(tw):
def
getText
(
tw
):
"""Replace some text
"""
logme
.
debug
(
'
getText'
)
logme
.
debug
(
__name__
+
':
getText'
)
text
=
tw
.
find
(
"p"
,
"tweet-text"
)
.
text
text
=
text
.
replace
(
"
\n
"
,
" "
)
text
=
text
.
replace
(
"http"
,
" http"
)
...
...
@@ -52,21 +50,21 @@ def getText(tw):
def
getStat
(
tw
,
_type
):
"""Get stats about Tweet
"""
logme
.
debug
(
'
getStat'
)
logme
.
debug
(
__name__
+
':
getStat'
)
st
=
f
"ProfileTweet-action--{_type} u-hiddenVisually"
return
tw
.
find
(
"span"
,
st
)
.
find
(
"span"
)[
"data-tweet-stat-count"
]
def
getRetweet
(
profile
,
username
,
user
):
"""Get Retweet
"""
logme
.
debug
(
'
getRetweet'
)
logme
.
debug
(
__name__
+
':
getRetweet'
)
if
profile
and
username
.
lower
()
!=
user
.
lower
():
return
1
def
Tweet
(
tw
,
location
,
config
):
"""Create Tweet object
"""
logme
.
debug
(
'
Tweet'
)
logme
.
debug
(
__name__
+
':
Tweet'
)
t
=
tweet
()
t
.
id
=
int
(
tw
[
"data-item-id"
])
t
.
id_str
=
tw
[
"data-item-id"
]
...
...
twint/url.py
View file @
25a9d99d
from
.
import
_logme
logme
=
_logme
.
_logger
(
__name__
)
import
logging
as
logme
mobile
=
"https://mobile.twitter.com"
base
=
"https://twitter.com/i"
async
def
Favorites
(
username
,
init
):
logme
.
debug
(
'
Favorites'
)
logme
.
debug
(
__name__
+
':
Favorites'
)
url
=
f
"{mobile}/{username}/favorites?lang=en"
if
init
!=
-
1
:
...
...
@@ -14,7 +12,7 @@ async def Favorites(username, init):
return
url
async
def
Followers
(
username
,
init
):
logme
.
debug
(
'
Followers'
)
logme
.
debug
(
__name__
+
':
Followers'
)
url
=
f
"{mobile}/{username}/followers?lang=en"
if
init
!=
-
1
:
...
...
@@ -23,7 +21,7 @@ async def Followers(username, init):
return
url
async
def
Following
(
username
,
init
):
logme
.
debug
(
'
Following'
)
logme
.
debug
(
__name__
+
':
Following'
)
url
=
f
"{mobile}/{username}/following?lang=en"
if
init
!=
-
1
:
...
...
@@ -32,7 +30,7 @@ async def Following(username, init):
return
url
async
def
MobileProfile
(
username
,
init
):
logme
.
debug
(
'
MobileProfile'
)
logme
.
debug
(
__name__
+
':
MobileProfile'
)
url
=
f
"{mobile}/{username}?lang=en"
if
init
!=
-
1
:
...
...
@@ -41,7 +39,7 @@ async def MobileProfile(username, init):
return
url
async
def
Profile
(
username
,
init
):
logme
.
debug
(
'
Profile'
)
logme
.
debug
(
__name__
+
':
Profile'
)
url
=
f
"{base}/profiles/show/{username}/timeline/tweets?include_"
url
+=
"available_features=1&lang=en&include_entities=1"
url
+=
"&include_new_items_bar=true"
...
...
@@ -52,7 +50,7 @@ async def Profile(username, init):
return
url
async
def
Search
(
config
,
init
):
logme
.
debug
(
'
Search'
)
logme
.
debug
(
__name__
+
':
Search'
)
url
=
f
"{base}/search/timeline"
params
=
[
(
'f'
,
'tweets'
),
...
...
twint/user.py
View file @
25a9d99d
from
.
import
_logme
logme
=
_logme
.
_logger
(
__name__
)
import
logging
as
logme
class
user
:
type
=
"user"
...
...
@@ -9,7 +7,7 @@ class user:
pass
def
inf
(
ur
,
_type
):
logme
.
debug
(
'
inf'
)
logme
.
debug
(
__name__
+
':
inf'
)
try
:
group
=
ur
.
find
(
"div"
,
"user-actions btn-group not-following"
)
if
group
==
None
:
...
...
@@ -33,7 +31,7 @@ def inf(ur, _type):
return
ret
def
card
(
ur
,
_type
):
logme
.
debug
(
'
card'
)
logme
.
debug
(
__name__
+
':
card'
)
if
_type
==
"bio"
:
try
:
ret
=
ur
.
find
(
"p"
,
"ProfileHeaderCard-bio u-dir"
)
.
text
.
replace
(
"
\n
"
,
" "
)
...
...
@@ -54,12 +52,12 @@ def card(ur, _type):
return
ret
def
join
(
ur
):
logme
.
debug
(
'
join'
)
logme
.
debug
(
__name__
+
':
join'
)
jd
=
ur
.
find
(
"span"
,
"ProfileHeaderCard-joinDateText js-tooltip u-dir"
)[
"title"
]
return
jd
.
split
(
" - "
)
def
convertToInt
(
x
):
logme
.
debug
(
'
contertToInt'
)
logme
.
debug
(
__name__
+
':
contertToInt'
)
multDict
=
{
"k"
:
1000
,
"m"
:
1000000
,
...
...
@@ -83,7 +81,7 @@ def convertToInt(x):
return
0
def
stat
(
ur
,
_type
):
logme
.
debug
(
'
stat'
)
logme
.
debug
(
__name__
+
':
stat'
)
_class
=
f
"ProfileNav-item ProfileNav-item--{_type}"
stat
=
ur
.
find
(
"li"
,
_class
)
try
:
...
...
@@ -93,7 +91,7 @@ def stat(ur, _type):
return
r
def
media
(
ur
):
logme
.
debug
(
'
media'
)
logme
.
debug
(
__name__
+
':
media'
)
try
:
media_count
=
ur
.
find
(
"a"
,
"PhotoRail-headingWithCount js-nav"
)
.
text
.
strip
()
.
split
(
" "
)[
0
]
media_count
=
convertToInt
(
media_count
)
...
...
@@ -103,7 +101,7 @@ def media(ur):
return
media_count
def
verified
(
ur
):
logme
.
debug
(
'
verified'
)
logme
.
debug
(
__name__
+
':
verified'
)
try
:
is_verified
=
ur
.
find
(
"span"
,
"ProfileHeaderCard-badges"
)
.
text
if
"Verified account"
in
is_verified
:
...
...
@@ -116,7 +114,7 @@ def verified(ur):
return
is_verified
def
User
(
ur
):
logme
.
debug
(
'
User'
)
logme
.
debug
(
__name__
+
':
User'
)
u
=
user
()
for
img
in
ur
.
findAll
(
"img"
,
"Emoji Emoji--forText"
):
img
.
replaceWith
(
img
[
"alt"
])
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment