hydrus/hydrus/client/networking/ClientNetworkingJobs.py

1680 lines
53 KiB
Python
Raw Normal View History

2019-01-09 22:59:03 +00:00
import io
2020-05-20 21:36:02 +00:00
import os
import requests
import threading
import traceback
import time
import urllib
2020-04-22 21:00:35 +00:00
from hydrus.core import HydrusConstants as HC
from hydrus.core import HydrusData
from hydrus.core import HydrusExceptions
from hydrus.core import HydrusGlobals as HG
from hydrus.core import HydrusNetworking
from hydrus.core import HydrusThreading
from hydrus.core import HydrusText
2018-04-18 22:10:15 +00:00
2020-07-29 20:52:44 +00:00
from hydrus.client import ClientConstants as CC
from hydrus.client import ClientData
from hydrus.client.networking import ClientNetworkingContexts
from hydrus.client.networking import ClientNetworkingDomain
2020-04-16 00:09:42 +00:00
try:
import cloudscraper
CLOUDSCRAPER_OK = True
2020-05-13 19:03:16 +00:00
try:
# help pyinstaller
import pyparsing
PYPARSING_OK = True
except:
PYPARSING_OK = False
2020-04-16 00:09:42 +00:00
except:
CLOUDSCRAPER_OK = False
2020-05-13 19:03:16 +00:00
PYPARSING_OK = False
2020-04-16 00:09:42 +00:00
2018-04-18 22:10:15 +00:00
def ConvertStatusCodeAndDataIntoExceptionInfo( status_code, data, is_hydrus_service = False ):
2019-02-13 22:26:43 +00:00
( error_text, encoding ) = HydrusText.NonFailingUnicodeDecode( data, 'utf-8' )
2018-04-18 22:10:15 +00:00
2019-01-23 22:19:16 +00:00
print_long_error_text = True
2018-04-18 22:10:15 +00:00
if status_code == 304:
2019-01-23 22:19:16 +00:00
print_long_error_text = False
2018-04-18 22:10:15 +00:00
eclass = HydrusExceptions.NotModifiedException
2019-02-27 23:03:30 +00:00
elif status_code == 400:
eclass = HydrusExceptions.BadRequestException
2018-04-18 22:10:15 +00:00
elif status_code == 401:
2019-02-06 22:41:35 +00:00
eclass = HydrusExceptions.MissingCredentialsException
2018-04-18 22:10:15 +00:00
elif status_code == 403:
2019-02-06 22:41:35 +00:00
eclass = HydrusExceptions.InsufficientCredentialsException
2018-04-18 22:10:15 +00:00
elif status_code == 404:
2019-01-23 22:19:16 +00:00
print_long_error_text = False
2018-04-18 22:10:15 +00:00
eclass = HydrusExceptions.NotFoundException
2020-05-06 21:31:41 +00:00
elif status_code == 409:
eclass = HydrusExceptions.ConflictException
2018-04-18 22:10:15 +00:00
elif status_code == 419:
eclass = HydrusExceptions.SessionException
elif status_code == 426:
eclass = HydrusExceptions.NetworkVersionException
2019-10-02 23:38:59 +00:00
elif status_code == 429:
eclass = HydrusExceptions.BandwidthException
2018-04-18 22:10:15 +00:00
elif status_code == 509:
eclass = HydrusExceptions.BandwidthException
2020-04-22 21:00:35 +00:00
elif status_code == 502:
eclass = HydrusExceptions.ShouldReattemptNetworkException
elif status_code == 503:
2018-04-18 22:10:15 +00:00
2020-04-22 21:00:35 +00:00
if is_hydrus_service:
2018-04-18 22:10:15 +00:00
eclass = HydrusExceptions.ServerBusyException
else:
2020-04-22 21:00:35 +00:00
eclass = HydrusExceptions.ShouldReattemptNetworkException
2018-04-18 22:10:15 +00:00
2020-04-22 21:00:35 +00:00
elif status_code >= 500:
eclass = HydrusExceptions.ServerException
2018-04-18 22:10:15 +00:00
else:
eclass = HydrusExceptions.NetworkException
2019-01-23 22:19:16 +00:00
if len( error_text ) > 1024 and print_long_error_text:
large_chunk = error_text[ : 512 * 1024 ]
smaller_chunk = large_chunk[:256]
HydrusData.DebugPrint( large_chunk )
error_text = 'The server\'s error text was too long to display. The first part follows, while a larger chunk has been written to the log.'
error_text += os.linesep
error_text += smaller_chunk
2019-06-05 19:42:39 +00:00
e = eclass( '{}: {}'.format( status_code, error_text ) )
2018-04-18 22:10:15 +00:00
return ( e, error_text )
class NetworkJob( object ):
2018-10-31 21:41:14 +00:00
WILLING_TO_WAIT_ON_INVALID_LOGIN = True
2018-04-18 22:10:15 +00:00
IS_HYDRUS_SERVICE = False
2020-05-13 19:03:16 +00:00
IS_IPFS_SERVICE = False
2018-04-18 22:10:15 +00:00
def __init__( self, method, url, body = None, referral_url = None, temp_path = None ):
2019-01-16 22:40:53 +00:00
if body is not None and isinstance( body, str ):
body = bytes( body, 'utf-8' )
2018-04-18 22:10:15 +00:00
self.engine = None
self._lock = threading.Lock()
self._method = method
self._url = url
2018-08-22 21:10:59 +00:00
2019-07-03 22:49:27 +00:00
self._max_connection_attempts_allowed = 5
2018-08-22 21:10:59 +00:00
self._domain = ClientNetworkingDomain.ConvertURLIntoDomain( self._url )
self._second_level_domain = ClientNetworkingDomain.ConvertURLIntoSecondLevelDomain( self._url )
2018-04-18 22:10:15 +00:00
self._body = body
self._referral_url = referral_url
self._temp_path = temp_path
self._files = None
self._for_login = False
self._current_connection_attempt_number = 1
2020-04-16 00:09:42 +00:00
self._we_tried_cloudflare_once = False
2018-04-18 22:10:15 +00:00
self._additional_headers = {}
self._creation_time = HydrusData.GetNow()
self._bandwidth_tracker = HydrusNetworking.BandwidthTracker()
2019-08-07 22:59:53 +00:00
self._connection_error_wake_time = 0
2019-11-28 01:11:46 +00:00
self._serverside_bandwidth_wake_time = 0
2019-08-07 22:59:53 +00:00
2018-04-18 22:10:15 +00:00
self._wake_time = 0
self._content_type = None
2019-01-09 22:59:03 +00:00
self._encoding = 'utf-8'
2019-02-06 22:41:35 +00:00
self._encoding_confirmed = False
2019-01-09 22:59:03 +00:00
self._stream_io = io.BytesIO()
2018-04-18 22:10:15 +00:00
self._error_exception = Exception( 'Exception not initialised.' ) # PyLint hint, wew
self._error_exception = None
self._error_text = None
self._is_done_event = threading.Event()
2018-10-17 21:00:09 +00:00
self._is_started = False
2018-04-18 22:10:15 +00:00
self._is_done = False
self._is_cancelled = False
2018-08-22 21:10:59 +00:00
self._gallery_token_name = None
self._gallery_token_consumed = False
2018-04-18 22:10:15 +00:00
self._bandwidth_manual_override = False
self._bandwidth_manual_override_delayed_timestamp = None
self._last_time_ongoing_bandwidth_failed = 0
2019-01-09 22:59:03 +00:00
self._status_text = 'initialising\u2026'
2018-04-18 22:10:15 +00:00
self._num_bytes_read = 0
self._num_bytes_to_read = 1
2018-06-06 21:27:02 +00:00
self._file_import_options = None
2018-04-18 22:10:15 +00:00
self._network_contexts = self._GenerateNetworkContexts()
( self._session_network_context, self._login_network_context ) = self._GenerateSpecificNetworkContexts()
2018-05-09 20:23:00 +00:00
def _CanReattemptConnection( self ):
2019-07-03 22:49:27 +00:00
return self._current_connection_attempt_number <= self._max_connection_attempts_allowed
2018-05-09 20:23:00 +00:00
2018-04-18 22:10:15 +00:00
def _CanReattemptRequest( self ):
if self._method == 'GET':
max_attempts_allowed = 5
elif self._method == 'POST':
max_attempts_allowed = 1
return self._current_connection_attempt_number <= max_attempts_allowed
def _GenerateNetworkContexts( self ):
network_contexts = []
network_contexts.append( ClientNetworkingContexts.GLOBAL_NETWORK_CONTEXT )
2018-08-22 21:10:59 +00:00
domains = ClientNetworkingDomain.ConvertDomainIntoAllApplicableDomains( self._domain )
2018-04-18 22:10:15 +00:00
network_contexts.extend( ( ClientNetworkingContexts.NetworkContext( CC.NETWORK_CONTEXT_DOMAIN, domain ) for domain in domains ) )
return network_contexts
def _GenerateSpecificNetworkContexts( self ):
# we always store cookies in the larger session (even if the cookie itself refers to a subdomain in the session object)
# but we can login to a specific subdomain
2018-08-22 21:10:59 +00:00
session_network_context = ClientNetworkingContexts.NetworkContext( CC.NETWORK_CONTEXT_DOMAIN, self._second_level_domain )
login_network_context = ClientNetworkingContexts.NetworkContext( CC.NETWORK_CONTEXT_DOMAIN, self._domain )
2018-04-18 22:10:15 +00:00
return ( session_network_context, login_network_context )
def _SendRequestAndGetResponse( self ):
with self._lock:
2018-09-12 21:36:26 +00:00
ncs = list( self._network_contexts )
headers = self.engine.domain_manager.GetHeaders( ncs )
with self._lock:
2018-04-18 22:10:15 +00:00
method = self._method
url = self._url
data = self._body
files = self._files
2020-05-13 19:03:16 +00:00
if self.IS_HYDRUS_SERVICE or self.IS_IPFS_SERVICE:
2018-04-18 22:10:15 +00:00
headers[ 'User-Agent' ] = 'hydrus client/' + str( HC.NETWORK_VERSION )
2019-10-09 22:03:03 +00:00
referral_url = self.engine.domain_manager.GetReferralURL( self._url, self._referral_url )
if HG.network_report_mode:
HydrusData.ShowText( 'Network Jobs Referral URLs for {}:{}Given: {}{}Used: {}'.format( self._url, os.linesep, self._referral_url, os.linesep, referral_url ) )
if referral_url is not None:
try:
referral_url.encode( 'latin-1' )
except UnicodeEncodeError:
# quick and dirty way to quote this url when it comes here with full unicode chars. not perfect, but does the job
referral_url = urllib.parse.quote( referral_url, "!#$%&'()*+,/:;=?@[]~" )
if HG.network_report_mode:
HydrusData.ShowText( 'Network Jobs Quoted Referral URL for {}:{}{}'.format( self._url, os.linesep, referral_url ) )
2018-04-18 22:10:15 +00:00
2019-10-09 22:03:03 +00:00
headers[ 'referer' ] = referral_url
2018-04-18 22:10:15 +00:00
2019-10-09 22:03:03 +00:00
for ( key, value ) in self._additional_headers.items():
2018-04-18 22:10:15 +00:00
2019-01-09 22:59:03 +00:00
headers[ key ] = value
2018-04-18 22:10:15 +00:00
2019-01-09 22:59:03 +00:00
self._status_text = 'sending request\u2026'
2018-04-18 22:10:15 +00:00
2018-09-12 21:36:26 +00:00
snc = self._session_network_context
session = self.engine.session_manager.GetSession( snc )
2018-04-18 22:10:15 +00:00
connect_timeout = HG.client_controller.new_options.GetInteger( 'network_timeout' )
read_timeout = connect_timeout * 6
response = session.request( method, url, data = data, files = files, headers = headers, stream = True, timeout = ( connect_timeout, read_timeout ) )
return response
def _IsCancelled( self ):
if self._is_cancelled:
return True
2019-07-31 22:01:02 +00:00
if HG.model_shutdown:
2018-04-18 22:10:15 +00:00
return True
return False
def _IsDone( self ):
if self._is_done:
return True
2019-07-31 22:01:02 +00:00
if HG.model_shutdown or HydrusThreading.IsThreadShuttingDown():
2018-04-18 22:10:15 +00:00
return True
return False
def _ObeysBandwidth( self ):
if self._bandwidth_manual_override:
return False
if self._bandwidth_manual_override_delayed_timestamp is not None and HydrusData.TimeHasPassed( self._bandwidth_manual_override_delayed_timestamp ):
return False
if self._method == 'POST':
return False
if self._for_login:
return False
return True
def _OngoingBandwidthOK( self ):
now = HydrusData.GetNow()
if now == self._last_time_ongoing_bandwidth_failed: # it won't have changed, so no point spending any cpu checking
return False
else:
result = self.engine.bandwidth_manager.CanContinueDownload( self._network_contexts )
if not result:
self._last_time_ongoing_bandwidth_failed = now
return result
def _ReadResponse( self, response, stream_dest, max_allowed = None ):
with self._lock:
2018-06-06 21:27:02 +00:00
if self._content_type is not None and self._content_type in HC.mime_enum_lookup:
mime = HC.mime_enum_lookup[ self._content_type ]
else:
mime = None
2018-04-18 22:10:15 +00:00
if 'content-length' in response.headers:
2019-07-03 22:49:27 +00:00
2018-04-18 22:10:15 +00:00
self._num_bytes_to_read = int( response.headers[ 'content-length' ] )
if max_allowed is not None and self._num_bytes_to_read > max_allowed:
2019-01-09 22:59:03 +00:00
raise HydrusExceptions.NetworkException( 'The url was apparently ' + HydrusData.ToHumanBytes( self._num_bytes_to_read ) + ' but the max network size for this type of job is ' + HydrusData.ToHumanBytes( max_allowed ) + '!' )
2018-06-06 21:27:02 +00:00
if self._file_import_options is not None:
2019-01-23 22:19:16 +00:00
is_complete_file_size = True
2018-06-06 21:27:02 +00:00
2019-01-23 22:19:16 +00:00
self._file_import_options.CheckNetworkDownload( mime, self._num_bytes_to_read, is_complete_file_size )
2018-04-18 22:10:15 +00:00
else:
self._num_bytes_to_read = None
2019-07-03 22:49:27 +00:00
num_bytes_read_is_accurate = True
2018-04-18 22:10:15 +00:00
for chunk in response.iter_content( chunk_size = 65536 ):
if self._IsCancelled():
return
stream_dest.write( chunk )
2019-07-03 22:49:27 +00:00
total_bytes_read = response.raw.tell()
if total_bytes_read == 0:
# this seems to occur when the response is chunked transfer encoding (note, no Content-Length)
# there's no great way to track raw bytes read in this case. the iter_content chunk can be unzipped from that
# nonetheless, requests does raise ChunkedEncodingError if it stops early, so not a huge deal to miss here, just slightly off bandwidth tracking
num_bytes_read_is_accurate = False
chunk_num_bytes = len( chunk )
self._num_bytes_read += chunk_num_bytes
else:
chunk_num_bytes = total_bytes_read - self._num_bytes_read
self._num_bytes_read = total_bytes_read
2018-04-18 22:10:15 +00:00
with self._lock:
2019-07-03 22:49:27 +00:00
if self._num_bytes_to_read is not None and num_bytes_read_is_accurate and self._num_bytes_read > self._num_bytes_to_read:
raise HydrusExceptions.NetworkException( 'Too much data: Was expecting {} but server continued responding!'.format( HydrusData.ToHumanBytes( self._num_bytes_to_read ) ) )
2018-04-18 22:10:15 +00:00
if max_allowed is not None and self._num_bytes_read > max_allowed:
2019-01-09 22:59:03 +00:00
raise HydrusExceptions.NetworkException( 'The url exceeded the max network size for this type of job, which is ' + HydrusData.ToHumanBytes( max_allowed ) + '!' )
2018-06-06 21:27:02 +00:00
if self._file_import_options is not None:
2019-01-23 22:19:16 +00:00
is_complete_file_size = False
2018-06-06 21:27:02 +00:00
2019-01-23 22:19:16 +00:00
self._file_import_options.CheckNetworkDownload( mime, self._num_bytes_read, is_complete_file_size )
2018-04-18 22:10:15 +00:00
2019-07-03 22:49:27 +00:00
self._ReportDataUsed( chunk_num_bytes )
2018-04-18 22:10:15 +00:00
self._WaitOnOngoingBandwidth()
if HG.view_shutdown:
raise HydrusExceptions.ShutdownException()
2019-07-03 22:49:27 +00:00
if self._num_bytes_to_read is not None and num_bytes_read_is_accurate and self._num_bytes_read < self._num_bytes_to_read:
2018-04-18 22:10:15 +00:00
2019-07-03 22:49:27 +00:00
raise HydrusExceptions.ShouldReattemptNetworkException( 'Incomplete response: Was expecting {} but actually got {} !'.format( HydrusData.ToHumanBytes( self._num_bytes_to_read ), HydrusData.ToHumanBytes( self._num_bytes_read ) ) )
2018-04-18 22:10:15 +00:00
def _ReportDataUsed( self, num_bytes ):
self._bandwidth_tracker.ReportDataUsed( num_bytes )
self.engine.bandwidth_manager.ReportDataUsed( self._network_contexts, num_bytes )
def _SetCancelled( self ):
self._is_cancelled = True
self._SetDone()
def _SetError( self, e, error ):
self._error_exception = e
self._error_text = error
2018-12-12 22:15:46 +00:00
if HG.network_report_mode:
HydrusData.ShowText( 'Network error should follow:' )
HydrusData.ShowException( e )
HydrusData.ShowText( error )
2018-04-18 22:10:15 +00:00
self._SetDone()
def _SetDone( self ):
self._is_done = True
self._is_done_event.set()
def _Sleep( self, seconds ):
self._wake_time = HydrusData.GetNow() + seconds
2020-04-16 00:09:42 +00:00
def _SolveCloudFlare( self, response ):
if CLOUDSCRAPER_OK:
try:
is_firewall = cloudscraper.CloudScraper.is_Firewall_Blocked( response )
2020-07-29 20:52:44 +00:00
if hasattr( cloudscraper.CloudScraper, 'is_reCaptcha_Challenge' ):
is_captcha = getattr( cloudscraper.CloudScraper, 'is_reCaptcha_Challenge' )( response )
elif hasattr( cloudscraper.CloudScraper, 'is_Captcha_Challenge' ):
is_captcha = getattr( cloudscraper.CloudScraper, 'is_Captcha_Challenge' )( response )
else:
is_captcha = False
is_attemptable = is_captcha or cloudscraper.CloudScraper.is_IUAM_Challenge( response )
2020-04-16 00:09:42 +00:00
except Exception as e:
HydrusData.Print( 'cloudflarescraper had an error looking at "{}" response: {}'.format( self._url, str( e ) ) )
HydrusData.PrintException( e )
return
if is_firewall:
raise HydrusExceptions.CloudFlareException( 'It looks like the site has Firewall-Blocked your IP or IP range with CloudFlare.' )
if is_attemptable:
try:
with self._lock:
ncs = list( self._network_contexts )
snc = self._session_network_context
headers = self.engine.domain_manager.GetHeaders( ncs )
if 'User-Agent' not in headers:
raise HydrusExceptions.CloudFlareException( 'No User-Agent set for hydrus!' )
user_agent = headers[ 'User-Agent' ]
( cf_tokens, user_agent ) = cloudscraper.get_tokens( self._url, browser = { 'custom' : user_agent } )
session = self.engine.session_manager.GetSession( snc )
cf_cookies = [ cookie for cookie in session.cookies if cookie.name.startswith( '__cf' ) ]
for cookie in cf_cookies:
session.cookies.clear( cookie.domain, cookie.path, cookie.name )
2020-04-16 00:09:42 +00:00
domain = '.{}'.format( ClientNetworkingDomain.ConvertURLIntoSecondLevelDomain( self._url ) )
path = '/'
expires = HydrusData.GetNow() + 30 * 86400
secure = True
rest = { 'HttpOnly' : None, 'SameSite' : 'None' }
2020-04-16 00:09:42 +00:00
for ( name, value ) in cf_tokens.items():
ClientNetworkingDomain.AddCookieToSession( session, name, value, domain, path, expires, secure = secure, rest = rest )
2020-04-16 00:09:42 +00:00
self.engine.session_manager.SetDirty()
except Exception as e:
2020-07-29 20:52:44 +00:00
if hasattr( cloudscraper.exceptions, 'CloudflareReCaptchaProvider' ):
e_type_test = getattr( cloudscraper.exceptions, 'CloudflareReCaptchaProvider' )
elif hasattr( cloudscraper.exceptions, 'CloudflareCaptchaProvider' ):
e_type_test = getattr( cloudscraper.exceptions, 'CloudflareCaptchaProvider' )
else:
e_type_test = int
if isinstance( e, e_type_test ):
2020-04-29 21:44:12 +00:00
message = 'The page had a captcha, and hydrus does not yet plug cloudscraper into a captcha-solving service.'
else:
message = str( e )
2020-04-16 00:09:42 +00:00
HydrusData.PrintException( e )
2020-04-29 21:44:12 +00:00
raise HydrusExceptions.CloudFlareException( 'Looks like an unsolvable CloudFlare issue: {}'.format( message ) )
2020-04-16 00:09:42 +00:00
raise HydrusExceptions.ShouldReattemptNetworkException( 'CloudFlare needed solving.' )
2019-06-19 22:08:48 +00:00
def _WaitOnConnectionError( self, status_text ):
2019-11-28 01:11:46 +00:00
connection_error_wait_time = HG.client_controller.new_options.GetInteger( 'connection_error_wait_time' )
self._connection_error_wake_time = HydrusData.GetNow() + ( ( self._current_connection_attempt_number - 1 ) * connection_error_wait_time )
2019-06-19 22:08:48 +00:00
2019-08-07 22:59:53 +00:00
while not HydrusData.TimeHasPassed( self._connection_error_wake_time ) and not self._IsCancelled():
2019-06-19 22:08:48 +00:00
with self._lock:
self._status_text = status_text + ' - retrying in {}'.format( ClientData.TimestampToPrettyTimeDelta( self._connection_error_wake_time ) )
2019-06-19 22:08:48 +00:00
time.sleep( 1 )
2018-04-18 22:10:15 +00:00
def _WaitOnOngoingBandwidth( self ):
while not self._OngoingBandwidthOK() and not self._IsCancelled():
time.sleep( 0.1 )
2019-11-28 01:11:46 +00:00
def _WaitOnServersideBandwidth( self, status_text ):
# 429 or 509 response from server. basically means 'I'm under big load mate'
# a future version of this could def talk to domain manager and add a temp delay so other network jobs can be informed
serverside_bandwidth_wait_time = HG.client_controller.new_options.GetInteger( 'serverside_bandwidth_wait_time' )
self._serverside_bandwidth_wake_time = HydrusData.GetNow() + ( ( self._current_connection_attempt_number - 1 ) * serverside_bandwidth_wait_time )
while not HydrusData.TimeHasPassed( self._serverside_bandwidth_wake_time ) and not self._IsCancelled():
with self._lock:
self._status_text = status_text + ' - retrying in {}'.format( ClientData.TimestampToPrettyTimeDelta( self._serverside_bandwidth_wake_time ) )
2019-11-28 01:11:46 +00:00
time.sleep( 1 )
2018-04-18 22:10:15 +00:00
def AddAdditionalHeader( self, key, value ):
with self._lock:
self._additional_headers[ key ] = value
def BandwidthOK( self ):
with self._lock:
if self._ObeysBandwidth():
2020-06-17 21:31:54 +00:00
return self.engine.bandwidth_manager.CanDoWork( self._network_contexts )
2018-04-18 22:10:15 +00:00
else:
return True
2018-10-31 21:41:14 +00:00
def Cancel( self, status_text = None ):
2018-04-18 22:10:15 +00:00
with self._lock:
2018-10-31 21:41:14 +00:00
if status_text is None:
status_text = 'cancelled!'
self._status_text = status_text
2018-04-18 22:10:15 +00:00
self._SetCancelled()
def CanValidateInPopup( self ):
with self._lock:
return self.engine.domain_manager.CanValidateInPopup( self._network_contexts )
def CheckCanLogin( self ):
with self._lock:
if self._for_login:
2018-10-24 21:34:02 +00:00
raise HydrusExceptions.ValidationException( 'Login jobs should not be asked if they can login!' )
2018-04-18 22:10:15 +00:00
else:
return self.engine.login_manager.CheckCanLogin( self._login_network_context )
2019-08-07 22:59:53 +00:00
def CurrentlyWaitingOnConnectionError( self ):
with self._lock:
return not HydrusData.TimeHasPassed( self._connection_error_wake_time )
2019-11-28 01:11:46 +00:00
def CurrentlyWaitingOnServersideBandwidth( self ):
with self._lock:
return not HydrusData.TimeHasPassed( self._serverside_bandwidth_wake_time )
2020-04-16 00:09:42 +00:00
def DomainOK( self ):
with self._lock:
if self._max_connection_attempts_allowed == 1:
return True
domain_ok = self.engine.domain_manager.DomainOK( self._url )
if not domain_ok:
self._status_text = 'This domain has had several serious errors recently. Waiting a bit.'
self._Sleep( 10 )
return domain_ok
2018-04-18 22:10:15 +00:00
def GenerateLoginProcess( self ):
with self._lock:
if self._for_login:
raise Exception( 'Login jobs should not be asked to generate login processes!' )
else:
return self.engine.login_manager.GenerateLoginProcess( self._login_network_context )
def GenerateValidationPopupProcess( self ):
with self._lock:
return self.engine.domain_manager.GenerateValidationPopupProcess( self._network_contexts )
2019-01-09 22:59:03 +00:00
def GetContentBytes( self ):
2018-04-18 22:10:15 +00:00
with self._lock:
self._stream_io.seek( 0 )
return self._stream_io.read()
2019-01-09 22:59:03 +00:00
def GetContentText( self ):
data = self.GetContentBytes()
2019-02-13 22:26:43 +00:00
( text, self._encoding ) = HydrusText.NonFailingUnicodeDecode( data, self._encoding )
2019-01-09 22:59:03 +00:00
return text
2018-04-18 22:10:15 +00:00
def GetContentType( self ):
with self._lock:
return self._content_type
def GetCreationTime( self ):
with self._lock:
return self._creation_time
2018-08-22 21:10:59 +00:00
def GetDomain( self ):
with self._lock:
return self._domain
2018-04-18 22:10:15 +00:00
def GetErrorException( self ):
with self._lock:
return self._error_exception
def GetErrorText( self ):
with self._lock:
return self._error_text
2018-11-28 22:31:04 +00:00
def GetLoginNetworkContext( self ):
with self._lock:
return self._login_network_context
2018-04-18 22:10:15 +00:00
def GetNetworkContexts( self ):
with self._lock:
return list( self._network_contexts )
2018-08-22 21:10:59 +00:00
def GetSecondLevelDomain( self ):
with self._lock:
return self._second_level_domain
2018-10-17 21:00:09 +00:00
def GetSession( self ):
with self._lock:
snc = self._session_network_context
session = self.engine.session_manager.GetSession( snc )
return session
2018-04-18 22:10:15 +00:00
def GetStatus( self ):
with self._lock:
return ( self._status_text, self._bandwidth_tracker.GetUsage( HC.BANDWIDTH_TYPE_DATA, 1 ), self._num_bytes_read, self._num_bytes_to_read )
def GetTotalDataUsed( self ):
with self._lock:
return self._bandwidth_tracker.GetUsage( HC.BANDWIDTH_TYPE_DATA, None )
def GetURL( self ):
with self._lock:
return self._url
def HasError( self ):
with self._lock:
return self._error_exception is not None
def IsAsleep( self ):
with self._lock:
return not HydrusData.TimeHasPassed( self._wake_time )
def IsCancelled( self ):
with self._lock:
return self._IsCancelled()
def IsDone( self ):
with self._lock:
return self._IsDone()
2018-11-28 22:31:04 +00:00
def IsHydrusJob( self ):
with self._lock:
return False
2018-04-18 22:10:15 +00:00
def IsValid( self ):
with self._lock:
return self.engine.domain_manager.IsValid( self._network_contexts )
def NeedsLogin( self ):
with self._lock:
if self._for_login:
return False
else:
return self.engine.login_manager.NeedsLogin( self._login_network_context )
def NoEngineYet( self ):
return self.engine is None
def ObeysBandwidth( self ):
return self._ObeysBandwidth()
2019-07-03 22:49:27 +00:00
def OnlyTryConnectionOnce( self ):
self._max_connection_attempts_allowed = 1
2018-04-18 22:10:15 +00:00
def OverrideBandwidth( self, delay = None ):
with self._lock:
if delay is None:
self._bandwidth_manual_override = True
self._wake_time = 0
else:
self._bandwidth_manual_override_delayed_timestamp = HydrusData.GetNow() + delay
self._wake_time = min( self._wake_time, self._bandwidth_manual_override_delayed_timestamp + 1 )
2019-08-07 22:59:53 +00:00
def OverrideConnectionErrorWait( self ):
with self._lock:
self._connection_error_wake_time = 0
2019-11-28 01:11:46 +00:00
def OverrideServersideBandwidthWait( self ):
with self._lock:
self._serverside_bandwidth_wake_time = 0
2018-09-19 21:54:51 +00:00
def OverrideToken( self ):
with self._lock:
self._gallery_token_consumed = True
self._wake_time = 0
2018-04-18 22:10:15 +00:00
def SetError( self, e, error ):
with self._lock:
self._SetError( e, error )
def SetFiles( self, files ):
with self._lock:
self._files = files
2018-06-06 21:27:02 +00:00
def SetFileImportOptions( self, file_import_options ):
with self._lock:
self._file_import_options = file_import_options
2018-04-18 22:10:15 +00:00
def SetForLogin( self, for_login ):
with self._lock:
self._for_login = for_login
2018-08-22 21:10:59 +00:00
def SetGalleryToken( self, token_name ):
with self._lock:
self._gallery_token_name = token_name
2018-04-18 22:10:15 +00:00
def SetStatus( self, text ):
with self._lock:
self._status_text = text
def Sleep( self, seconds ):
with self._lock:
self._Sleep( seconds )
def Start( self ):
try:
2018-09-12 21:36:26 +00:00
with self._lock:
2018-10-17 21:00:09 +00:00
self._is_started = True
2019-01-09 22:59:03 +00:00
self._status_text = 'job started'
2018-09-12 21:36:26 +00:00
2018-04-18 22:10:15 +00:00
request_completed = False
while not request_completed:
2019-12-11 23:18:37 +00:00
if self._IsCancelled():
2018-04-18 22:10:15 +00:00
2019-12-11 23:18:37 +00:00
return
response = None
try:
2019-06-19 22:08:48 +00:00
2018-04-18 22:10:15 +00:00
response = self._SendRequestAndGetResponse()
with self._lock:
if self._body is not None:
self._ReportDataUsed( len( self._body ) )
2018-06-06 21:27:02 +00:00
if 'Content-Type' in response.headers:
self._content_type = response.headers[ 'Content-Type' ]
2018-04-18 22:10:15 +00:00
if response.ok:
with self._lock:
2019-01-09 22:59:03 +00:00
self._status_text = 'downloading\u2026'
if response.encoding is not None:
2019-02-06 22:41:35 +00:00
encoding = response.encoding
# we'll default to utf-8 rather than ISO-8859-1
we_got_lame_iso_default_from_requests = encoding == 'ISO-8859-1' and ( self._content_type is None or encoding not in self._content_type )
if not we_got_lame_iso_default_from_requests:
self._encoding = encoding
2018-04-18 22:10:15 +00:00
if self._temp_path is None:
self._ReadResponse( response, self._stream_io, 104857600 )
else:
with open( self._temp_path, 'wb' ) as f:
self._ReadResponse( response, f )
with self._lock:
self._status_text = 'done!'
else:
with self._lock:
self._status_text = str( response.status_code ) + ' - ' + str( response.reason )
2020-04-16 00:09:42 +00:00
# it is important we do this before ReadResponse, as the CF test needs r.text, which is nullified if we first access with iter_content
if not self._we_tried_cloudflare_once:
self._we_tried_cloudflare_once = True
self._SolveCloudFlare( response )
2018-06-06 21:27:02 +00:00
self._ReadResponse( response, self._stream_io, 104857600 )
2018-04-18 22:10:15 +00:00
with self._lock:
self._stream_io.seek( 0 )
data = self._stream_io.read()
( e, error_text ) = ConvertStatusCodeAndDataIntoExceptionInfo( response.status_code, data, self.IS_HYDRUS_SERVICE )
2020-04-22 21:00:35 +00:00
if isinstance( e, ( HydrusExceptions.BandwidthException, HydrusExceptions.ShouldReattemptNetworkException ) ):
2019-10-02 23:38:59 +00:00
raise e
2018-04-18 22:10:15 +00:00
self._SetError( e, error_text )
request_completed = True
2019-10-02 23:38:59 +00:00
except HydrusExceptions.BandwidthException as e:
self._current_connection_attempt_number += 1
2020-04-16 00:09:42 +00:00
if self._CanReattemptRequest():
2019-10-02 23:38:59 +00:00
2020-04-16 00:09:42 +00:00
self.engine.domain_manager.ReportNetworkInfrastructureError( self._url )
else:
raise HydrusExceptions.BandwidthException( 'Server reported very limited bandwidth: ' + str( e ) )
2019-10-02 23:38:59 +00:00
2019-11-28 01:11:46 +00:00
self._WaitOnServersideBandwidth( 'server reported limited bandwidth' )
2019-10-02 23:38:59 +00:00
2018-04-25 22:07:52 +00:00
except HydrusExceptions.ShouldReattemptNetworkException as e:
self._current_connection_attempt_number += 1
if not self._CanReattemptRequest():
2020-04-16 00:09:42 +00:00
raise HydrusExceptions.NetworkInfrastructureException( 'Ran out of reattempts on this error: ' + str( e ) )
2018-04-25 22:07:52 +00:00
2020-04-16 00:09:42 +00:00
self._WaitOnConnectionError( str( e ) )
2018-04-25 22:07:52 +00:00
2018-04-18 22:10:15 +00:00
except requests.exceptions.ChunkedEncodingError:
self._current_connection_attempt_number += 1
if not self._CanReattemptRequest():
2020-04-16 00:09:42 +00:00
raise HydrusExceptions.StreamTimeoutException( 'Unable to complete request--it broke mid-way!' )
2018-04-18 22:10:15 +00:00
2019-06-19 22:08:48 +00:00
self._WaitOnConnectionError( 'connection broke mid-request' )
2018-04-18 22:10:15 +00:00
except ( requests.exceptions.ConnectionError, requests.exceptions.ConnectTimeout ):
self._current_connection_attempt_number += 1
2020-04-16 00:09:42 +00:00
if self._CanReattemptConnection():
self.engine.domain_manager.ReportNetworkInfrastructureError( self._url )
else:
2018-04-18 22:10:15 +00:00
raise HydrusExceptions.ConnectionException( 'Could not connect!' )
2019-06-19 22:08:48 +00:00
self._WaitOnConnectionError( 'connection failed' )
2018-04-18 22:10:15 +00:00
except requests.exceptions.ReadTimeout:
self._current_connection_attempt_number += 1
if not self._CanReattemptRequest():
2020-04-16 00:09:42 +00:00
raise HydrusExceptions.StreamTimeoutException( 'Connection successful, but reading response timed out!' )
2018-04-18 22:10:15 +00:00
2019-06-19 22:08:48 +00:00
self._WaitOnConnectionError( 'read timed out' )
2018-04-18 22:10:15 +00:00
2019-12-11 23:18:37 +00:00
finally:
if response is not None:
# if full data was not read, the response will hang around in connection pool longer than we want
# so just an explicit close here
response.close()
2018-04-18 22:10:15 +00:00
except Exception as e:
with self._lock:
trace = traceback.format_exc()
2020-05-27 21:27:52 +00:00
if not isinstance( e, ( HydrusExceptions.NetworkInfrastructureException, HydrusExceptions.StreamTimeoutException, HydrusExceptions.FileSizeException ) ):
2019-01-09 22:59:03 +00:00
HydrusData.Print( trace )
2020-04-16 00:09:42 +00:00
if isinstance( e, HydrusExceptions.NetworkInfrastructureException ):
self.engine.domain_manager.ReportNetworkInfrastructureError( self._url )
2019-01-09 22:59:03 +00:00
self._status_text = 'Error: ' + str( e )
2018-04-18 22:10:15 +00:00
self._SetError( e, trace )
finally:
with self._lock:
self._SetDone()
2018-08-22 21:10:59 +00:00
def TokensOK( self ):
with self._lock:
2018-09-12 21:36:26 +00:00
need_token = self._gallery_token_name is not None and not self._gallery_token_consumed
sld = self._second_level_domain
gtn = self._gallery_token_name
if need_token:
2018-10-31 21:41:14 +00:00
( consumed, next_timestamp ) = self.engine.bandwidth_manager.TryToConsumeAGalleryToken( sld, gtn )
2018-09-12 21:36:26 +00:00
with self._lock:
2018-08-22 21:10:59 +00:00
if consumed:
2018-09-12 21:36:26 +00:00
self._status_text = 'slot consumed, starting soon'
2018-08-22 21:10:59 +00:00
self._gallery_token_consumed = True
else:
self._status_text = 'waiting for a ' + self._gallery_token_name + ' slot: next ' + ClientData.TimestampToPrettyTimeDelta( next_timestamp, just_now_threshold = 1 )
2018-08-22 21:10:59 +00:00
self._Sleep( 1 )
return False
2018-09-12 21:36:26 +00:00
return True
2018-08-22 21:10:59 +00:00
2020-06-17 21:31:54 +00:00
def TryToStartBandwidth( self ):
with self._lock:
if self._ObeysBandwidth():
result = self.engine.bandwidth_manager.TryToStartRequest( self._network_contexts )
if result:
self._bandwidth_tracker.ReportRequestUsed()
else:
( bandwidth_waiting_duration, bandwidth_network_context ) = self.engine.bandwidth_manager.GetWaitingEstimateAndContext( self._network_contexts )
will_override = self._bandwidth_manual_override_delayed_timestamp is not None
override_coming_first = False
if will_override:
override_waiting_duration = self._bandwidth_manual_override_delayed_timestamp - HydrusData.GetNow()
override_coming_first = override_waiting_duration < bandwidth_waiting_duration
just_now_threshold = 2
if override_coming_first:
waiting_duration = override_waiting_duration
waiting_str = 'overriding bandwidth ' + ClientData.TimestampToPrettyTimeDelta( self._bandwidth_manual_override_delayed_timestamp, just_now_string = 'imminently', just_now_threshold = just_now_threshold )
2020-06-17 21:31:54 +00:00
else:
waiting_duration = bandwidth_waiting_duration
waiting_str = 'bandwidth free ' + ClientData.TimestampToPrettyTimeDelta( HydrusData.GetNow() + waiting_duration, just_now_string = 'imminently', just_now_threshold = just_now_threshold )
2020-06-17 21:31:54 +00:00
waiting_str += '\u2026 (' + bandwidth_network_context.ToHumanString() + ')'
self._status_text = waiting_str
if waiting_duration > 1200:
self._Sleep( 30 )
elif waiting_duration > 120:
self._Sleep( 10 )
elif waiting_duration > 10:
self._Sleep( 1 )
return result
else:
self._bandwidth_tracker.ReportRequestUsed()
self.engine.bandwidth_manager.ReportRequestUsed( self._network_contexts )
return True
2018-04-18 22:10:15 +00:00
def WaitUntilDone( self ):
while True:
self._is_done_event.wait( 5 )
if self.IsDone():
break
with self._lock:
2019-07-31 22:01:02 +00:00
if HG.model_shutdown or HydrusThreading.IsThreadShuttingDown():
2018-04-18 22:10:15 +00:00
raise HydrusExceptions.ShutdownException()
elif self._error_exception is not None:
if isinstance( self._error_exception, Exception ):
raise self._error_exception
else:
raise Exception( 'Problem in network error handling.' )
elif self._IsCancelled():
if self._method == 'POST':
2018-10-31 21:41:14 +00:00
message = 'Upload cancelled: ' + self._status_text
2018-04-18 22:10:15 +00:00
else:
2018-10-31 21:41:14 +00:00
message = 'Download cancelled: ' + self._status_text
2018-04-18 22:10:15 +00:00
raise HydrusExceptions.CancelledException( message )
2018-10-31 21:41:14 +00:00
def WillingToWaitOnInvalidLogin( self ):
return self.WILLING_TO_WAIT_ON_INVALID_LOGIN
2018-04-18 22:10:15 +00:00
class NetworkJobDownloader( NetworkJob ):
def __init__( self, downloader_page_key, method, url, body = None, referral_url = None, temp_path = None ):
self._downloader_page_key = downloader_page_key
NetworkJob.__init__( self, method, url, body = body, referral_url = referral_url, temp_path = temp_path )
def _GenerateNetworkContexts( self ):
network_contexts = NetworkJob._GenerateNetworkContexts( self )
network_contexts.append( ClientNetworkingContexts.NetworkContext( CC.NETWORK_CONTEXT_DOWNLOADER_PAGE, self._downloader_page_key ) )
return network_contexts
class NetworkJobSubscription( NetworkJob ):
2018-10-31 21:41:14 +00:00
WILLING_TO_WAIT_ON_INVALID_LOGIN = False
2018-04-18 22:10:15 +00:00
def __init__( self, subscription_key, method, url, body = None, referral_url = None, temp_path = None ):
self._subscription_key = subscription_key
NetworkJob.__init__( self, method, url, body = body, referral_url = referral_url, temp_path = temp_path )
def _GenerateNetworkContexts( self ):
network_contexts = NetworkJob._GenerateNetworkContexts( self )
network_contexts.append( ClientNetworkingContexts.NetworkContext( CC.NETWORK_CONTEXT_SUBSCRIPTION, self._subscription_key ) )
return network_contexts
class NetworkJobHydrus( NetworkJob ):
2018-10-31 21:41:14 +00:00
WILLING_TO_WAIT_ON_INVALID_LOGIN = False
2018-04-18 22:10:15 +00:00
IS_HYDRUS_SERVICE = True
def __init__( self, service_key, method, url, body = None, referral_url = None, temp_path = None ):
self._service_key = service_key
NetworkJob.__init__( self, method, url, body = body, referral_url = referral_url, temp_path = temp_path )
def _CheckHydrusVersion( self, service_type, response ):
service_string = HC.service_string_lookup[ service_type ]
headers = response.headers
if 'server' not in headers or service_string not in headers[ 'server' ]:
raise HydrusExceptions.WrongServiceTypeException( 'Target was not a ' + service_string + '!' )
server_header = headers[ 'server' ]
( service_string_gumpf, network_version ) = server_header.split( '/' )
network_version = int( network_version )
if network_version != HC.NETWORK_VERSION:
if network_version > HC.NETWORK_VERSION:
message = 'Your client is out of date; please download the latest release.'
else:
message = 'The server is out of date; please ask its admin to update to the latest release.'
raise HydrusExceptions.NetworkVersionException( 'Network version mismatch! The server\'s network version was ' + str( network_version ) + ', whereas your client\'s is ' + str( HC.NETWORK_VERSION ) + '! ' + message )
def _GenerateNetworkContexts( self ):
2018-06-06 21:27:02 +00:00
network_contexts = []
2018-04-18 22:10:15 +00:00
2018-06-06 21:27:02 +00:00
network_contexts.append( ClientNetworkingContexts.GLOBAL_NETWORK_CONTEXT )
2018-04-18 22:10:15 +00:00
network_contexts.append( ClientNetworkingContexts.NetworkContext( CC.NETWORK_CONTEXT_HYDRUS, self._service_key ) )
return network_contexts
def _GenerateSpecificNetworkContexts( self ):
# we store cookies on and login to the same hydrus-specific context
session_network_context = ClientNetworkingContexts.NetworkContext( CC.NETWORK_CONTEXT_HYDRUS, self._service_key )
login_network_context = session_network_context
return ( session_network_context, login_network_context )
def _ReportDataUsed( self, num_bytes ):
service = self.engine.controller.services_manager.GetService( self._service_key )
service_type = service.GetServiceType()
if service_type in HC.RESTRICTED_SERVICES:
account = service.GetAccount()
account.ReportDataUsed( num_bytes )
NetworkJob._ReportDataUsed( self, num_bytes )
def _SendRequestAndGetResponse( self ):
service = self.engine.controller.services_manager.GetService( self._service_key )
service_type = service.GetServiceType()
if service_type in HC.RESTRICTED_SERVICES:
account = service.GetAccount()
account.ReportRequestUsed()
response = NetworkJob._SendRequestAndGetResponse( self )
if service_type in HC.RESTRICTED_SERVICES:
self._CheckHydrusVersion( service_type, response )
return response
2018-11-28 22:31:04 +00:00
def IsHydrusJob( self ):
with self._lock:
return True
2020-05-13 19:03:16 +00:00
class NetworkJobIPFS( NetworkJob ):
def __init__( self, method, url, body = None, referral_url = None, temp_path = None ):
NetworkJob.__init__( self, method, url, body = body, referral_url = referral_url, temp_path = temp_path )
self.OnlyTryConnectionOnce()
self.OverrideBandwidth()
IS_IPFS_SERVICE = True
2018-04-18 22:10:15 +00:00
class NetworkJobWatcherPage( NetworkJob ):
2018-05-23 21:05:06 +00:00
def __init__( self, watcher_key, method, url, body = None, referral_url = None, temp_path = None ):
2018-04-18 22:10:15 +00:00
2018-05-23 21:05:06 +00:00
self._watcher_key = watcher_key
2018-04-18 22:10:15 +00:00
NetworkJob.__init__( self, method, url, body = body, referral_url = referral_url, temp_path = temp_path )
def _GenerateNetworkContexts( self ):
network_contexts = NetworkJob._GenerateNetworkContexts( self )
2018-05-23 21:05:06 +00:00
network_contexts.append( ClientNetworkingContexts.NetworkContext( CC.NETWORK_CONTEXT_WATCHER_PAGE, self._watcher_key ) )
2018-04-18 22:10:15 +00:00
return network_contexts