hydrus/hydrus/client/networking/ClientNetworkingJobs.py

2137 lines
69 KiB
Python
Raw Normal View History

import calendar
2019-01-09 22:59:03 +00:00
import io
2020-05-20 21:36:02 +00:00
import os
import typing
2020-05-20 21:36:02 +00:00
import requests
import threading
import traceback
import time
import urllib
2020-04-22 21:00:35 +00:00
from hydrus.core import HydrusConstants as HC
from hydrus.core import HydrusData
from hydrus.core import HydrusExceptions
from hydrus.core import HydrusGlobals as HG
from hydrus.core import HydrusThreading
from hydrus.core import HydrusText
2021-04-07 21:26:45 +00:00
from hydrus.core.networking import HydrusNetworking
2018-04-18 22:10:15 +00:00
2020-07-29 20:52:44 +00:00
from hydrus.client import ClientConstants as CC
from hydrus.client import ClientData
2022-06-22 20:43:12 +00:00
from hydrus.client import ClientTime
2020-07-29 20:52:44 +00:00
from hydrus.client.networking import ClientNetworkingContexts
from hydrus.client.networking import ClientNetworkingFunctions
2020-07-29 20:52:44 +00:00
2020-04-16 00:09:42 +00:00
try:
import cloudscraper
CLOUDSCRAPER_OK = True
2020-05-13 19:03:16 +00:00
try:
# help pyinstaller
import pyparsing
PYPARSING_OK = True
except:
PYPARSING_OK = False
2020-04-16 00:09:42 +00:00
except:
CLOUDSCRAPER_OK = False
2020-05-13 19:03:16 +00:00
PYPARSING_OK = False
2020-04-16 00:09:42 +00:00
2018-04-18 22:10:15 +00:00
def ConvertStatusCodeAndDataIntoExceptionInfo( status_code, data, is_hydrus_service = False ):
2019-02-13 22:26:43 +00:00
( error_text, encoding ) = HydrusText.NonFailingUnicodeDecode( data, 'utf-8' )
2018-04-18 22:10:15 +00:00
2019-01-23 22:19:16 +00:00
print_long_error_text = True
2018-04-18 22:10:15 +00:00
if status_code == 304:
2019-01-23 22:19:16 +00:00
print_long_error_text = False
2018-04-18 22:10:15 +00:00
eclass = HydrusExceptions.NotModifiedException
2019-02-27 23:03:30 +00:00
elif status_code == 400:
eclass = HydrusExceptions.BadRequestException
2018-04-18 22:10:15 +00:00
elif status_code == 401:
2019-02-06 22:41:35 +00:00
eclass = HydrusExceptions.MissingCredentialsException
2018-04-18 22:10:15 +00:00
elif status_code == 403:
2019-02-06 22:41:35 +00:00
eclass = HydrusExceptions.InsufficientCredentialsException
2018-04-18 22:10:15 +00:00
elif status_code == 404:
2019-01-23 22:19:16 +00:00
print_long_error_text = False
2018-04-18 22:10:15 +00:00
eclass = HydrusExceptions.NotFoundException
2022-04-06 20:40:17 +00:00
elif status_code == 406:
eclass = HydrusExceptions.NotAcceptable
2020-05-06 21:31:41 +00:00
elif status_code == 409:
eclass = HydrusExceptions.ConflictException
2021-03-10 23:10:11 +00:00
elif status_code == 416:
eclass = HydrusExceptions.RangeNotSatisfiableException
2018-04-18 22:10:15 +00:00
elif status_code == 419:
eclass = HydrusExceptions.SessionException
elif status_code == 426:
eclass = HydrusExceptions.NetworkVersionException
2019-10-02 23:38:59 +00:00
elif status_code == 429:
eclass = HydrusExceptions.BandwidthException
2018-04-18 22:10:15 +00:00
elif status_code == 509:
eclass = HydrusExceptions.BandwidthException
2020-04-22 21:00:35 +00:00
elif status_code == 502:
eclass = HydrusExceptions.ShouldReattemptNetworkException
elif status_code == 503:
2018-04-18 22:10:15 +00:00
2020-04-22 21:00:35 +00:00
if is_hydrus_service:
2018-04-18 22:10:15 +00:00
eclass = HydrusExceptions.ServerBusyException
else:
2020-04-22 21:00:35 +00:00
eclass = HydrusExceptions.ShouldReattemptNetworkException
2018-04-18 22:10:15 +00:00
2020-04-22 21:00:35 +00:00
elif status_code >= 500:
eclass = HydrusExceptions.ServerException
2018-04-18 22:10:15 +00:00
else:
eclass = HydrusExceptions.NetworkException
2019-01-23 22:19:16 +00:00
if len( error_text ) > 1024 and print_long_error_text:
large_chunk = error_text[ : 512 * 1024 ]
smaller_chunk = large_chunk[:256]
HydrusData.DebugPrint( large_chunk )
error_text = 'The server\'s error text was too long to display. The first part follows, while a larger chunk has been written to the log.'
error_text += os.linesep
error_text += smaller_chunk
2019-06-05 19:42:39 +00:00
e = eclass( '{}: {}'.format( status_code, error_text ) )
2018-04-18 22:10:15 +00:00
return ( e, error_text )
class NetworkJob( object ):
2018-10-31 21:41:14 +00:00
WILLING_TO_WAIT_ON_INVALID_LOGIN = True
2018-04-18 22:10:15 +00:00
IS_HYDRUS_SERVICE = False
2020-05-13 19:03:16 +00:00
IS_IPFS_SERVICE = False
2018-04-18 22:10:15 +00:00
def __init__( self, method: str, url: str, body = None, referral_url = None, temp_path = None ):
2018-04-18 22:10:15 +00:00
2019-01-16 22:40:53 +00:00
if body is not None and isinstance( body, str ):
body = bytes( body, 'utf-8' )
2018-04-18 22:10:15 +00:00
self.engine = None
self._lock = threading.Lock()
self._method = method
self._url = url
2018-08-22 21:10:59 +00:00
2022-03-30 20:28:13 +00:00
self._current_connection_attempt_number = 1
2022-06-22 20:43:12 +00:00
self._current_request_attempt_number = 1
self._this_is_a_one_shot_request = False
2022-03-30 20:28:13 +00:00
self._we_tried_cloudflare_once = False
2019-07-03 22:49:27 +00:00
self._domain = ClientNetworkingFunctions.ConvertURLIntoDomain( self._url )
self._second_level_domain = ClientNetworkingFunctions.ConvertURLIntoSecondLevelDomain( self._url )
2018-08-22 21:10:59 +00:00
2018-04-18 22:10:15 +00:00
self._body = body
self._referral_url = referral_url
2020-12-16 22:29:51 +00:00
self._actual_fetched_url = self._url
2018-04-18 22:10:15 +00:00
self._temp_path = temp_path
2022-05-18 20:18:25 +00:00
self._response_server_header = None
self._response_last_modified = None
if self._temp_path is None:
# 100MB HTML file lmao
self._max_allowed_bytes = 104857600
else:
self._max_allowed_bytes = None
2018-04-18 22:10:15 +00:00
self._files = None
self._for_login = False
self._additional_headers = {}
self._creation_time = HydrusData.GetNow()
self._bandwidth_tracker = HydrusNetworking.BandwidthTracker()
2019-08-07 22:59:53 +00:00
self._connection_error_wake_time = 0
2019-11-28 01:11:46 +00:00
self._serverside_bandwidth_wake_time = 0
2019-08-07 22:59:53 +00:00
self._wake_time_float = 0.0
2018-04-18 22:10:15 +00:00
self._content_type = None
self._response_mime = None
2018-04-18 22:10:15 +00:00
2019-01-09 22:59:03 +00:00
self._encoding = 'utf-8'
2019-02-06 22:41:35 +00:00
self._encoding_confirmed = False
2019-01-09 22:59:03 +00:00
self._stream_io = io.BytesIO()
2018-04-18 22:10:15 +00:00
self._error_exception = Exception( 'Exception not initialised.' ) # PyLint hint, wew
self._error_exception = None
self._error_text = None
self._is_done_event = threading.Event()
2018-10-17 21:00:09 +00:00
self._is_started = False
2018-04-18 22:10:15 +00:00
self._is_done = False
self._is_cancelled = False
2018-08-22 21:10:59 +00:00
self._gallery_token_name = None
self._gallery_token_consumed = False
self._last_gallery_token_estimate = 0
2018-04-18 22:10:15 +00:00
self._bandwidth_manual_override = False
self._bandwidth_manual_override_delayed_timestamp = None
self._last_bandwidth_time_estimate = 0
2018-04-18 22:10:15 +00:00
self._last_time_ongoing_bandwidth_failed = 0
2019-01-09 22:59:03 +00:00
self._status_text = 'initialising\u2026'
2018-04-18 22:10:15 +00:00
self._num_bytes_read = 0
self._num_bytes_to_read = 1
self._num_bytes_read_is_accurate = True
2021-11-03 20:49:56 +00:00
self._number_of_concurrent_empty_chunks = 0
2018-04-18 22:10:15 +00:00
2018-06-06 21:27:02 +00:00
self._file_import_options = None
2018-04-18 22:10:15 +00:00
self._network_contexts = self._GenerateNetworkContexts()
( self._session_network_context, self._login_network_context ) = self._GenerateSpecificNetworkContexts()
2018-05-09 20:23:00 +00:00
def _CanReattemptConnection( self ):
2022-06-22 20:43:12 +00:00
if self._this_is_a_one_shot_request:
return False
max_connection_attempts_allowed = HG.client_controller.new_options.GetInteger( 'max_connection_attempts_allowed' )
return self._current_connection_attempt_number <= max_connection_attempts_allowed
2018-05-09 20:23:00 +00:00
2018-04-18 22:10:15 +00:00
def _CanReattemptRequest( self ):
2022-06-22 20:43:12 +00:00
if self._this_is_a_one_shot_request:
return False
2018-04-18 22:10:15 +00:00
if self._method == 'GET':
2022-06-22 20:43:12 +00:00
max_attempts_allowed = HG.client_controller.new_options.GetInteger( 'max_request_attempts_allowed_get' )
2018-04-18 22:10:15 +00:00
2022-06-22 20:43:12 +00:00
else:
2018-04-18 22:10:15 +00:00
max_attempts_allowed = 1
2022-06-22 20:43:12 +00:00
return self._current_request_attempt_number <= max_attempts_allowed
2018-04-18 22:10:15 +00:00
def _GenerateModifiedDate( self, response: requests.Response ):
if 'Last-Modified' in response.headers:
# Thu, 20 May 2010 07:00:23 GMT
# these are always in GMT
last_modified_string = response.headers[ 'Last-Modified' ]
if last_modified_string.endswith( ' GMT' ):
last_modified_string = last_modified_string[:-4]
try:
struct_time = time.strptime( last_modified_string, '%a, %d %b %Y %H:%M:%S' )
# the given struct is in GMT, so calendar.timegm is appropriate here
2022-06-22 20:43:12 +00:00
last_modified_time = int( calendar.timegm( struct_time ) )
if ClientTime.TimestampIsSensible( last_modified_time ):
self._response_last_modified = last_modified_time
except:
pass
2018-04-18 22:10:15 +00:00
def _GenerateNetworkContexts( self ):
2022-03-30 20:28:13 +00:00
network_contexts = [ ClientNetworkingContexts.GLOBAL_NETWORK_CONTEXT ]
2018-04-18 22:10:15 +00:00
domains = ClientNetworkingFunctions.ConvertDomainIntoAllApplicableDomains( self._domain )
2018-04-18 22:10:15 +00:00
network_contexts.extend( ( ClientNetworkingContexts.NetworkContext( CC.NETWORK_CONTEXT_DOMAIN, domain ) for domain in domains ) )
return network_contexts
def _GenerateSpecificNetworkContexts( self ):
# we always store cookies in the larger session (even if the cookie itself refers to a subdomain in the session object)
# but we can login to a specific subdomain
2018-08-22 21:10:59 +00:00
session_network_context = ClientNetworkingContexts.NetworkContext( CC.NETWORK_CONTEXT_DOMAIN, self._second_level_domain )
login_network_context = ClientNetworkingContexts.NetworkContext( CC.NETWORK_CONTEXT_DOMAIN, self._domain )
2018-04-18 22:10:15 +00:00
return ( session_network_context, login_network_context )
2020-08-27 01:00:42 +00:00
def _GetTimeouts( self ):
connect_timeout = HG.client_controller.new_options.GetInteger( 'network_timeout' )
read_timeout = connect_timeout * 6
return ( connect_timeout, read_timeout )
2018-04-18 22:10:15 +00:00
def _IsCancelled( self ):
if self._is_cancelled:
return True
2022-01-19 21:28:59 +00:00
if HG.started_shutdown:
2018-04-18 22:10:15 +00:00
return True
return False
def _IsDone( self ):
if self._is_done:
return True
2022-01-19 21:28:59 +00:00
if HG.started_shutdown or HydrusThreading.IsThreadShuttingDown():
2018-04-18 22:10:15 +00:00
return True
return False
def _ObeysBandwidth( self ):
if self._bandwidth_manual_override:
return False
if self._bandwidth_manual_override_delayed_timestamp is not None and HydrusData.TimeHasPassed( self._bandwidth_manual_override_delayed_timestamp ):
return False
if self._method == 'POST':
return False
if self._for_login:
return False
return True
def _OngoingBandwidthOK( self ):
now = HydrusData.GetNow()
if now == self._last_time_ongoing_bandwidth_failed: # it won't have changed, so no point spending any cpu checking
return False
else:
result = self.engine.bandwidth_manager.CanContinueDownload( self._network_contexts )
if not result:
self._last_time_ongoing_bandwidth_failed = now
return result
def _ParseFirstResponseHeaders( self, response: requests.Response ):
2018-04-18 22:10:15 +00:00
with self._lock:
if 'Content-Type' in response.headers:
self._content_type = response.headers[ 'Content-Type' ]
2018-06-06 21:27:02 +00:00
if self._content_type is not None and self._content_type in HC.mime_enum_lookup:
self._response_mime = HC.mime_enum_lookup[ self._content_type ]
2018-06-06 21:27:02 +00:00
else:
self._response_mime = None
2018-06-06 21:27:02 +00:00
2018-04-18 22:10:15 +00:00
if 'content-length' in response.headers:
2019-07-03 22:49:27 +00:00
2018-04-18 22:10:15 +00:00
self._num_bytes_to_read = int( response.headers[ 'content-length' ] )
else:
2018-06-06 21:27:02 +00:00
self._num_bytes_to_read = None
if response.encoding is not None:
self._encoding = response.encoding
if response.ok: # i.e. we got what we expected, not some error
if 'content-length' in response.headers:
2018-06-06 21:27:02 +00:00
if self._max_allowed_bytes is not None and self._num_bytes_to_read > self._max_allowed_bytes:
raise HydrusExceptions.NetworkException( 'The url was apparently {} but the max network size for this type of job is {}!'.format( HydrusData.ToHumanBytes( self._num_bytes_to_read ), HydrusData.ToHumanBytes( self._max_allowed_bytes ) ) )
2018-06-06 21:27:02 +00:00
if self._file_import_options is not None:
is_complete_file_size = True
self._file_import_options.CheckNetworkDownload( self._response_mime, self._num_bytes_to_read, is_complete_file_size )
2018-04-18 22:10:15 +00:00
def _ReadResponse( self, response: requests.Response, stream_dest ):
if 'content-range' in response.headers:
content_range = response.headers[ 'content-range' ]
# Content-Range: <unit> <range-start>-<range-end>/<size>
# range and size can be *
if content_range.startswith( 'bytes ' ):
2018-04-18 22:10:15 +00:00
content_range = content_range[6:]
if '/' in content_range:
( byte_range, size ) = content_range.split( '/', 1 )
if byte_range != '*' and '-' in byte_range:
( byte_start, byte_end ) = byte_range.split( '-', 1 )
try:
byte_start = int( byte_start )
if byte_start != self._num_bytes_read:
# this server be crazy
# I guess in some cases we might be able to fast forward a < byte_start, but we don't have that raw byte access tech yet
# and if byte_start > num_bytes_read, then lmao
raise HydrusExceptions.NetworkException( 'This server delivered an undesired Range response! We asked for Range "{}" and got Content-Range "{}" back!'.format( response.request.headers[ 'range' ], response.headers[ 'content-range' ] ) )
except:
pass
if size != '*':
if self._num_bytes_to_read is None:
try:
num_bytes = int( size )
self._num_bytes_to_read = num_bytes
except:
pass
2018-04-18 22:10:15 +00:00
starting_num_bytes_read = self._num_bytes_read
2019-07-03 22:49:27 +00:00
2018-04-18 22:10:15 +00:00
for chunk in response.iter_content( chunk_size = 65536 ):
if self._IsCancelled():
raise HydrusExceptions.CancelledException()
2018-04-18 22:10:15 +00:00
stream_dest.write( chunk )
# get the raw bytes read, not the length of the chunk, as there may be transfer-encoding (chunked, gzip etc...)
total_bytes_read_in_this_response = response.raw.tell()
2019-07-03 22:49:27 +00:00
if total_bytes_read_in_this_response == 0:
2019-07-03 22:49:27 +00:00
# this seems to occur when the response is Transfer-Encoding: chunked (note, no Content-Length)
2019-07-03 22:49:27 +00:00
# there's no great way to track raw bytes read in this case. the iter_content chunk can be unzipped from that
# nonetheless, requests does raise ChunkedEncodingError if it stops early, so not a huge deal to miss here, just slightly off bandwidth tracking
self._num_bytes_read_is_accurate = False
2019-07-03 22:49:27 +00:00
chunk_num_bytes = len( chunk )
self._num_bytes_read += chunk_num_bytes
else:
previous_num_bytes_read = self._num_bytes_read
2019-07-03 22:49:27 +00:00
self._num_bytes_read = starting_num_bytes_read + total_bytes_read_in_this_response
chunk_num_bytes = self._num_bytes_read - previous_num_bytes_read
2019-07-03 22:49:27 +00:00
2018-04-18 22:10:15 +00:00
with self._lock:
if self._num_bytes_to_read is not None and self._num_bytes_read_is_accurate and self._num_bytes_read > self._num_bytes_to_read:
2019-07-03 22:49:27 +00:00
raise HydrusExceptions.NetworkException( 'Too much data: Was expecting {} but server continued responding!'.format( HydrusData.ToHumanBytes( self._num_bytes_to_read ) ) )
2018-04-18 22:10:15 +00:00
if self._max_allowed_bytes is not None and self._num_bytes_read > self._max_allowed_bytes:
2018-04-18 22:10:15 +00:00
raise HydrusExceptions.NetworkException( 'The url exceeded the max network size for this type of job, which is {}!'.format( HydrusData.ToHumanBytes( self._max_allowed_bytes ) ) )
2018-06-06 21:27:02 +00:00
if self._file_import_options is not None:
2019-01-23 22:19:16 +00:00
is_complete_file_size = False
2018-06-06 21:27:02 +00:00
self._file_import_options.CheckNetworkDownload( self._response_mime, self._num_bytes_read, is_complete_file_size )
2018-04-18 22:10:15 +00:00
2019-07-03 22:49:27 +00:00
self._ReportDataUsed( chunk_num_bytes )
2018-04-18 22:10:15 +00:00
self._WaitOnOngoingBandwidth()
2022-01-19 21:28:59 +00:00
if HG.started_shutdown:
2018-04-18 22:10:15 +00:00
raise HydrusExceptions.ShutdownException()
# stick with GET for now. if there is a complex way to range-chunk a POST, we'll deal with it then, but I don't want to spam file uploads to IQDB by accident etc...
download_is_definitely_incomplete = self._method == 'GET' and self._num_bytes_to_read is not None and self._num_bytes_read_is_accurate and self._num_bytes_read < self._num_bytes_to_read
we_read_some_data = self._num_bytes_read > starting_num_bytes_read
2021-09-22 21:12:34 +00:00
if download_is_definitely_incomplete and not we_read_some_data:
2018-04-18 22:10:15 +00:00
2021-11-03 20:49:56 +00:00
self._number_of_concurrent_empty_chunks += 1
if self._number_of_concurrent_empty_chunks > 2:
raise HydrusExceptions.NetworkException( 'The server appeared to want to send this URL in ranged chunks, but this chunk was empty!' )
more_to_download = True
else:
self._number_of_concurrent_empty_chunks = 0
more_to_download = we_read_some_data and download_is_definitely_incomplete
2018-04-18 22:10:15 +00:00
if not more_to_download:
if self._file_import_options is not None:
is_complete_file_size = True
self._file_import_options.CheckNetworkDownload( self._response_mime, self._num_bytes_read, is_complete_file_size )
return more_to_download
2018-04-18 22:10:15 +00:00
def _ReportDataUsed( self, num_bytes ):
self._bandwidth_tracker.ReportDataUsed( num_bytes )
self.engine.bandwidth_manager.ReportDataUsed( self._network_contexts, num_bytes )
2022-06-22 20:43:12 +00:00
def _ResetForAnotherAttempt( self ):
2022-06-22 20:43:12 +00:00
self._current_request_attempt_number += 1
self._content_type = None
self._response_mime = None
self._encoding = 'utf-8'
self._encoding_confirmed = False
self._stream_io = io.BytesIO()
self._num_bytes_read = 0
self._num_bytes_to_read = 1
self._num_bytes_read_is_accurate = True
2021-11-03 20:49:56 +00:00
self._number_of_concurrent_empty_chunks = 0
2022-06-22 20:43:12 +00:00
def _ResetForAnotherConnectionAttempt( self ):
self._ResetForAnotherAttempt()
self._current_connection_attempt_number += 1
self._current_request_attempt_number = 1
2021-09-22 21:12:34 +00:00
def _SendRequestAndGetResponse( self ) -> requests.Response:
with self._lock:
ncs = list( self._network_contexts )
headers = self.engine.domain_manager.GetHeaders( ncs )
with self._lock:
method = self._method
url = self._url
data = self._body
files = self._files
if self.IS_HYDRUS_SERVICE or self.IS_IPFS_SERVICE:
headers[ 'User-Agent' ] = 'hydrus client/' + str( HC.NETWORK_VERSION )
referral_url = self.engine.domain_manager.GetReferralURL( url, self._referral_url )
url_class = self.engine.domain_manager.GetURLClass( url )
2021-09-22 21:12:34 +00:00
if url_class is not None:
headers.update( url_class.GetHeaderOverrides() )
2021-09-22 21:12:34 +00:00
if url_class is None or url_class.GetURLType() in ( HC.URL_TYPE_FILE, HC.URL_TYPE_UNKNOWN ):
headers[ 'Range' ] = 'bytes={}-'.format( self._num_bytes_read )
2021-09-22 21:12:34 +00:00
if HG.network_report_mode:
HydrusData.ShowText( 'Network Jobs Referral URLs for {}:{}Given: {}{}Used: {}'.format( url, os.linesep, self._referral_url, os.linesep, referral_url ) )
2021-09-22 21:12:34 +00:00
if referral_url is not None:
try:
referral_url.encode( 'latin-1' )
except UnicodeEncodeError:
# quick and dirty way to quote this url when it comes here with full unicode chars. not perfect, but does the job
referral_url = urllib.parse.quote( referral_url, "!#$%&'()*+,/:;=?@[]~" )
if HG.network_report_mode:
HydrusData.ShowText( 'Network Jobs Quoted Referral URL for {}:{}{}'.format( url, os.linesep, referral_url ) )
2021-09-22 21:12:34 +00:00
headers[ 'referer' ] = referral_url
for ( key, value ) in self._additional_headers.items():
headers[ key ] = value
if self._num_bytes_read == 0:
self._status_text = 'sending request\u2026'
2021-09-22 21:12:34 +00:00
snc = self._session_network_context
session = self.engine.session_manager.GetSession( snc )
( connect_timeout, read_timeout ) = self._GetTimeouts()
response = session.request( method, url, data = data, files = files, headers = headers, stream = True, timeout = ( connect_timeout, read_timeout ) )
with self._lock:
if self._body is not None:
self._ReportDataUsed( len( self._body ) )
2021-09-22 21:12:34 +00:00
return response
2018-04-18 22:10:15 +00:00
def _SetCancelled( self ):
self._is_cancelled = True
self._SetDone()
def _SetError( self, e, error ):
self._error_exception = e
self._error_text = error
2018-12-12 22:15:46 +00:00
if HG.network_report_mode:
HydrusData.ShowText( 'Network error should follow:' )
HydrusData.ShowException( e )
HydrusData.ShowText( error )
2018-04-18 22:10:15 +00:00
self._SetDone()
def _SetDone( self ):
self._is_done = True
self._is_done_event.set()
def _Sleep( self, seconds_float ):
2018-04-18 22:10:15 +00:00
self._wake_time_float = HydrusData.GetNowFloat() + seconds_float
2018-04-18 22:10:15 +00:00
2020-04-16 00:09:42 +00:00
def _SolveCloudFlare( self, response ):
if CLOUDSCRAPER_OK:
try:
2022-03-23 20:57:10 +00:00
# cloudscraper refactored a bit around 1.2.60, so we now have some different paths to what we want
2020-07-29 20:52:44 +00:00
2022-03-30 20:28:13 +00:00
old_module = None
new_module = None
if hasattr( cloudscraper, 'CloudScraper' ):
old_module = getattr( cloudscraper, 'CloudScraper' )
if hasattr( cloudscraper, 'cloudflare' ):
m = getattr( cloudscraper, 'cloudflare' )
if hasattr( m, 'Cloudflare' ):
new_module = getattr( m, 'Cloudflare' )
2022-03-23 20:57:10 +00:00
possible_paths = [
2022-03-30 20:28:13 +00:00
( old_module, 'is_Firewall_Blocked' ),
( new_module, 'is_Firewall_Blocked' )
2022-03-23 20:57:10 +00:00
]
is_firewall = False
for ( m, method_name ) in possible_paths:
2020-07-29 20:52:44 +00:00
2022-03-30 20:28:13 +00:00
if m is None:
continue
2022-03-23 20:57:10 +00:00
if hasattr( m, method_name ):
is_firewall = getattr( m, method_name )( response )
if is_firewall:
break
2020-07-29 20:52:44 +00:00
2022-03-23 20:57:10 +00:00
possible_paths = [
2022-03-30 20:28:13 +00:00
( old_module, 'is_reCaptcha_Challenge' ),
( old_module, 'is_Captcha_Challenge' ),
( new_module, 'is_Captcha_Challenge' )
2022-03-23 20:57:10 +00:00
]
is_captcha = False
for ( m, method_name ) in possible_paths:
2020-07-29 20:52:44 +00:00
2022-03-30 20:28:13 +00:00
if m is None:
continue
2022-03-23 20:57:10 +00:00
if hasattr( m, method_name ):
is_captcha = getattr( m, method_name )( response )
if is_captcha:
break
2020-07-29 20:52:44 +00:00
2022-03-23 20:57:10 +00:00
possible_paths = [
2022-03-30 20:28:13 +00:00
( old_module, 'is_IUAM_Challenge' ),
( new_module, 'is_IUAM_Challenge' ),
( new_module, 'is_New_IUAM_Challenge' )
2022-03-23 20:57:10 +00:00
]
is_iuam = False
for ( m, method_name ) in possible_paths:
2020-07-29 20:52:44 +00:00
2022-03-30 20:28:13 +00:00
if m is None:
continue
2022-03-23 20:57:10 +00:00
if hasattr( m, method_name ):
is_iuam = getattr( m, method_name )( response )
if is_iuam:
break
2020-07-29 20:52:44 +00:00
2022-03-23 20:57:10 +00:00
is_attemptable = is_captcha or is_iuam
2020-04-16 00:09:42 +00:00
except Exception as e:
HydrusData.Print( 'cloudflarescraper had an error looking at "{}" response: {}'.format( self._url, str( e ) ) )
HydrusData.PrintException( e )
return
if is_firewall:
raise HydrusExceptions.CloudFlareException( 'It looks like the site has Firewall-Blocked your IP or IP range with CloudFlare.' )
if is_attemptable:
try:
with self._lock:
ncs = list( self._network_contexts )
snc = self._session_network_context
headers = self.engine.domain_manager.GetHeaders( ncs )
if 'User-Agent' not in headers:
raise HydrusExceptions.CloudFlareException( 'No User-Agent set for hydrus!' )
user_agent = headers[ 'User-Agent' ]
( cf_tokens, user_agent ) = cloudscraper.get_tokens( self._url, browser = { 'custom' : user_agent } )
session = self.engine.session_manager.GetSession( snc )
cf_cookies = [ cookie for cookie in session.cookies if cookie.name.startswith( '__cf' ) ]
for cookie in cf_cookies:
session.cookies.clear( cookie.domain, cookie.path, cookie.name )
domain = '.{}'.format( ClientNetworkingFunctions.ConvertURLIntoSecondLevelDomain( self._url ) )
2020-04-16 00:09:42 +00:00
path = '/'
expires = HydrusData.GetNow() + 30 * 86400
secure = True
rest = { 'HttpOnly' : None, 'SameSite' : 'None' }
2020-04-16 00:09:42 +00:00
for ( name, value ) in cf_tokens.items():
ClientNetworkingFunctions.AddCookieToSession( session, name, value, domain, path, expires, secure = secure, rest = rest )
2020-04-16 00:09:42 +00:00
2021-01-13 21:48:58 +00:00
self.engine.session_manager.SetSessionDirty( snc )
2020-04-16 00:09:42 +00:00
except Exception as e:
2021-11-24 21:59:58 +00:00
raise HydrusExceptions.CloudFlareException( 'This looks like an unsolvable CloudFlare captcha! Best solution we know of is to copy cookies and User-Agent header from your web browser to hydrus!' )
2020-04-16 00:09:42 +00:00
raise HydrusExceptions.ShouldReattemptNetworkException( 'CloudFlare needed solving.' )
2022-03-30 20:28:13 +00:00
def _WaitOnConnectionError( self, status_text: str ):
2019-06-19 22:08:48 +00:00
2019-11-28 01:11:46 +00:00
connection_error_wait_time = HG.client_controller.new_options.GetInteger( 'connection_error_wait_time' )
self._connection_error_wake_time = HydrusData.GetNow() + ( ( self._current_connection_attempt_number - 1 ) * connection_error_wait_time )
2019-06-19 22:08:48 +00:00
2019-08-07 22:59:53 +00:00
while not HydrusData.TimeHasPassed( self._connection_error_wake_time ) and not self._IsCancelled():
2019-06-19 22:08:48 +00:00
with self._lock:
2022-03-30 20:28:13 +00:00
self._status_text = '{} - retrying in {}'.format( status_text, ClientData.TimestampToPrettyTimeDelta( self._connection_error_wake_time ) )
time.sleep( 1 )
self._WaitOnNetworkTrafficPaused( status_text )
def _WaitOnNetworkTrafficPaused( self, status_text: str ):
while HG.client_controller.new_options.GetBoolean( 'pause_all_new_network_traffic' ) and not self._IsCancelled():
with self._lock:
self._status_text = '{} - now waiting because all network traffic is paused'.format( status_text )
2019-06-19 22:08:48 +00:00
time.sleep( 1 )
2018-04-18 22:10:15 +00:00
def _WaitOnOngoingBandwidth( self ):
while not self._OngoingBandwidthOK() and not self._IsCancelled():
time.sleep( 0.1 )
2022-03-30 20:28:13 +00:00
def _WaitOnServersideBandwidth( self, status_text: str ):
2019-11-28 01:11:46 +00:00
# 429 or 509 response from server. basically means 'I'm under big load mate'
# a future version of this could def talk to domain manager and add a temp delay so other network jobs can be informed
serverside_bandwidth_wait_time = HG.client_controller.new_options.GetInteger( 'serverside_bandwidth_wait_time' )
2022-06-22 20:43:12 +00:00
problem_rating = ( self._current_connection_attempt_number + self._current_request_attempt_number ) - 1
self._serverside_bandwidth_wake_time = HydrusData.GetNow() + ( problem_rating * serverside_bandwidth_wait_time )
2019-11-28 01:11:46 +00:00
while not HydrusData.TimeHasPassed( self._serverside_bandwidth_wake_time ) and not self._IsCancelled():
with self._lock:
2022-03-30 20:28:13 +00:00
self._status_text = '{} - retrying in {}'.format( status_text, ClientData.TimestampToPrettyTimeDelta( self._serverside_bandwidth_wake_time ) )
2019-11-28 01:11:46 +00:00
time.sleep( 1 )
2022-03-30 20:28:13 +00:00
self._WaitOnNetworkTrafficPaused( status_text )
2019-11-28 01:11:46 +00:00
2018-04-18 22:10:15 +00:00
def AddAdditionalHeader( self, key, value ):
with self._lock:
self._additional_headers[ key ] = value
def BandwidthOK( self ):
with self._lock:
if self._ObeysBandwidth():
2020-06-17 21:31:54 +00:00
return self.engine.bandwidth_manager.CanDoWork( self._network_contexts )
2018-04-18 22:10:15 +00:00
else:
return True
2018-10-31 21:41:14 +00:00
def Cancel( self, status_text = None ):
2018-04-18 22:10:15 +00:00
with self._lock:
2018-10-31 21:41:14 +00:00
if status_text is None:
status_text = 'cancelled!'
self._status_text = status_text
2018-04-18 22:10:15 +00:00
self._SetCancelled()
def CanValidateInPopup( self ):
with self._lock:
return self.engine.domain_manager.CanValidateInPopup( self._network_contexts )
def CheckCanLogin( self ):
with self._lock:
if self._for_login:
2018-10-24 21:34:02 +00:00
raise HydrusExceptions.ValidationException( 'Login jobs should not be asked if they can login!' )
2018-04-18 22:10:15 +00:00
else:
return self.engine.login_manager.CheckCanLogin( self._login_network_context )
2019-08-07 22:59:53 +00:00
def CurrentlyWaitingOnConnectionError( self ):
with self._lock:
return not HydrusData.TimeHasPassed( self._connection_error_wake_time )
2019-11-28 01:11:46 +00:00
def CurrentlyWaitingOnServersideBandwidth( self ):
with self._lock:
return not HydrusData.TimeHasPassed( self._serverside_bandwidth_wake_time )
2020-04-16 00:09:42 +00:00
def DomainOK( self ):
with self._lock:
2022-06-22 20:43:12 +00:00
if self._this_is_a_one_shot_request:
2020-04-16 00:09:42 +00:00
return True
domain_ok = self.engine.domain_manager.DomainOK( self._url )
if not domain_ok:
self._status_text = 'This domain has had several serious errors recently. Waiting a bit.'
self._Sleep( 10 )
return domain_ok
2018-04-18 22:10:15 +00:00
def GenerateLoginProcess( self ):
with self._lock:
if self._for_login:
raise Exception( 'Login jobs should not be asked to generate login processes!' )
else:
return self.engine.login_manager.GenerateLoginProcess( self._login_network_context )
def GenerateValidationPopupProcess( self ):
with self._lock:
return self.engine.domain_manager.GenerateValidationPopupProcess( self._network_contexts )
2020-12-16 22:29:51 +00:00
def GetActualFetchedURL( self ):
with self._lock:
return self._actual_fetched_url
2019-01-09 22:59:03 +00:00
def GetContentBytes( self ):
2018-04-18 22:10:15 +00:00
with self._lock:
self._stream_io.seek( 0 )
return self._stream_io.read()
2019-01-09 22:59:03 +00:00
def GetContentText( self ):
data = self.GetContentBytes()
2019-02-13 22:26:43 +00:00
( text, self._encoding ) = HydrusText.NonFailingUnicodeDecode( data, self._encoding )
2019-01-09 22:59:03 +00:00
return text
2018-04-18 22:10:15 +00:00
def GetContentType( self ):
with self._lock:
return self._content_type
def GetCreationTime( self ):
with self._lock:
return self._creation_time
2018-08-22 21:10:59 +00:00
def GetDomain( self ):
with self._lock:
return self._domain
2018-04-18 22:10:15 +00:00
def GetErrorException( self ):
with self._lock:
return self._error_exception
def GetErrorText( self ):
with self._lock:
return self._error_text
def GetLastModifiedTime( self ) -> typing.Optional[ int ]:
with self._lock:
return self._response_last_modified
2018-11-28 22:31:04 +00:00
def GetLoginNetworkContext( self ):
with self._lock:
return self._login_network_context
2018-04-18 22:10:15 +00:00
def GetNetworkContexts( self ):
with self._lock:
return list( self._network_contexts )
2018-08-22 21:10:59 +00:00
def GetSecondLevelDomain( self ):
with self._lock:
return self._second_level_domain
2018-10-17 21:00:09 +00:00
def GetSession( self ):
with self._lock:
snc = self._session_network_context
session = self.engine.session_manager.GetSession( snc )
return session
2018-04-18 22:10:15 +00:00
def GetStatus( self ):
with self._lock:
return ( self._status_text, self._bandwidth_tracker.GetUsage( HC.BANDWIDTH_TYPE_DATA, 1 ), self._num_bytes_read, self._num_bytes_to_read )
def GetTotalDataUsed( self ):
with self._lock:
return self._bandwidth_tracker.GetUsage( HC.BANDWIDTH_TYPE_DATA, None )
def GetURL( self ):
with self._lock:
return self._url
def HasError( self ):
with self._lock:
return self._error_exception is not None
def IsAsleep( self ):
with self._lock:
return not HydrusData.TimeHasPassedFloat( self._wake_time_float )
2018-04-18 22:10:15 +00:00
def IsCancelled( self ):
with self._lock:
return self._IsCancelled()
2022-05-18 20:18:25 +00:00
def IsCloudFlareCache( self ):
with self._lock:
return self._response_server_header is not None and self._response_server_header == 'cloudflare'
2018-04-18 22:10:15 +00:00
def IsDone( self ):
with self._lock:
return self._IsDone()
2018-11-28 22:31:04 +00:00
def IsHydrusJob( self ):
with self._lock:
return False
2018-04-18 22:10:15 +00:00
def IsValid( self ):
with self._lock:
return self.engine.domain_manager.IsValid( self._network_contexts )
def NeedsLogin( self ):
with self._lock:
if self._for_login:
return False
else:
return self.engine.login_manager.NeedsLogin( self._login_network_context )
def NoEngineYet( self ):
return self.engine is None
def ObeysBandwidth( self ):
return self._ObeysBandwidth()
2019-07-03 22:49:27 +00:00
def OnlyTryConnectionOnce( self ):
2022-06-22 20:43:12 +00:00
self._this_is_a_one_shot_request = True
2019-07-03 22:49:27 +00:00
2018-04-18 22:10:15 +00:00
def OverrideBandwidth( self, delay = None ):
with self._lock:
if delay is None:
self._bandwidth_manual_override = True
self._wake_time_float = 0.0
2018-04-18 22:10:15 +00:00
else:
self._bandwidth_manual_override_delayed_timestamp = HydrusData.GetNow() + delay
self._wake_time_float = min( self._wake_time_float, self._bandwidth_manual_override_delayed_timestamp + 1.0 )
2018-04-18 22:10:15 +00:00
2019-08-07 22:59:53 +00:00
def OverrideConnectionErrorWait( self ):
with self._lock:
self._connection_error_wake_time = 0
2019-11-28 01:11:46 +00:00
def OverrideServersideBandwidthWait( self ):
with self._lock:
self._serverside_bandwidth_wake_time = 0
2018-09-19 21:54:51 +00:00
def OverrideToken( self ):
with self._lock:
self._gallery_token_consumed = True
self._wake_time_float = 0.0
2018-09-19 21:54:51 +00:00
2021-10-06 20:59:30 +00:00
def ScrubDomainErrors( self ):
with self._lock:
self.engine.domain_manager.ScrubDomainErrors( self._url )
self._wake_time_float = 0.0
def SetError( self, e: Exception, error: str ):
2018-04-18 22:10:15 +00:00
with self._lock:
self._SetError( e, error )
def SetFiles( self, files ):
with self._lock:
self._files = files
2018-06-06 21:27:02 +00:00
def SetFileImportOptions( self, file_import_options ):
with self._lock:
self._file_import_options = file_import_options
def SetForLogin( self, for_login: bool ):
2018-04-18 22:10:15 +00:00
with self._lock:
self._for_login = for_login
def SetGalleryToken( self, token_name: str ):
2018-08-22 21:10:59 +00:00
with self._lock:
self._gallery_token_name = token_name
def SetStatus( self, text: str ):
2018-04-18 22:10:15 +00:00
with self._lock:
self._status_text = text
def Sleep( self, seconds ):
with self._lock:
self._Sleep( seconds )
def Start( self ):
try:
2018-09-12 21:36:26 +00:00
with self._lock:
2018-10-17 21:00:09 +00:00
self._is_started = True
2019-01-09 22:59:03 +00:00
self._status_text = 'job started'
2018-09-12 21:36:26 +00:00
2018-04-18 22:10:15 +00:00
request_completed = False
while not request_completed:
2019-12-11 23:18:37 +00:00
if self._IsCancelled():
2018-04-18 22:10:15 +00:00
2019-12-11 23:18:37 +00:00
return
response = None
try:
2019-06-19 22:08:48 +00:00
2018-04-18 22:10:15 +00:00
response = self._SendRequestAndGetResponse()
2020-12-16 22:29:51 +00:00
# I think tbh I would rather tell requests not to do 3XX, which is possible with allow_redirects = False on request, and then just raise various 3XX exceptions with url info, so I can requeue easier and keep a record
# figuring out correct new url seems a laugh, requests has slight helpers, but lots of exceptions
# SessionRedirectMixin here https://requests.readthedocs.io/en/latest/_modules/requests/sessions/
# but this will do as a patch for now
self._actual_fetched_url = response.url
2021-01-20 22:22:03 +00:00
if self._actual_fetched_url != self._url and HG.network_report_mode:
HydrusData.ShowText( 'Network Jobs Redirect: {} -> {}'.format( self._url, self._actual_fetched_url ) )
self._ParseFirstResponseHeaders( response )
2018-06-06 21:27:02 +00:00
2018-04-18 22:10:15 +00:00
if response.ok:
with self._lock:
2019-01-09 22:59:03 +00:00
self._status_text = 'downloading\u2026'
if self._temp_path is None:
2019-01-09 22:59:03 +00:00
stream_dest = self._stream_io
else:
stream_dest = open( self._temp_path, 'wb' )
2018-04-18 22:10:15 +00:00
try:
2018-04-18 22:10:15 +00:00
more_to_download = True
2018-04-18 22:10:15 +00:00
while more_to_download:
more_to_download = self._ReadResponse( response, stream_dest )
if more_to_download:
with self._lock:
self._status_text = 'downloading next part\u2026'
# this will magically have new Range header
response = self._SendRequestAndGetResponse()
if not response.ok:
raise HydrusExceptions.NetworkException( 'Ranged response failed {}'.format( response.status_code ) )
2018-04-18 22:10:15 +00:00
finally:
if self._temp_path is not None:
2018-04-18 22:10:15 +00:00
stream_dest.close()
2018-04-18 22:10:15 +00:00
with self._lock:
# we are complete here and worked ok
self._GenerateModifiedDate( response )
2022-05-18 20:18:25 +00:00
if 'Server' in response.headers:
self._response_server_header = response.headers[ 'Server' ]
2018-04-18 22:10:15 +00:00
self._status_text = 'done!'
else:
with self._lock:
self._status_text = str( response.status_code ) + ' - ' + str( response.reason )
2020-04-16 00:09:42 +00:00
# it is important we do this before ReadResponse, as the CF test needs r.text, which is nullified if we first access with iter_content
if not self._we_tried_cloudflare_once:
self._we_tried_cloudflare_once = True
self._SolveCloudFlare( response )
# don't care about 'more_to_download' here. lmao if some server ever tried to pull it off anyway
self._ReadResponse( response, self._stream_io )
data = self.GetContentBytes()
2018-04-18 22:10:15 +00:00
with self._lock:
( e, error_text ) = ConvertStatusCodeAndDataIntoExceptionInfo( response.status_code, data, self.IS_HYDRUS_SERVICE )
2020-04-22 21:00:35 +00:00
if isinstance( e, ( HydrusExceptions.BandwidthException, HydrusExceptions.ShouldReattemptNetworkException ) ):
2019-10-02 23:38:59 +00:00
raise e
2018-04-18 22:10:15 +00:00
self._SetError( e, error_text )
request_completed = True
except HydrusExceptions.CancelledException:
with self._lock:
self._status_text = 'Cancelled!'
return
2019-10-02 23:38:59 +00:00
except HydrusExceptions.BandwidthException as e:
2022-06-22 20:43:12 +00:00
self._ResetForAnotherAttempt()
2019-10-02 23:38:59 +00:00
2020-04-16 00:09:42 +00:00
if self._CanReattemptRequest():
2019-10-02 23:38:59 +00:00
2020-04-16 00:09:42 +00:00
self.engine.domain_manager.ReportNetworkInfrastructureError( self._url )
else:
raise HydrusExceptions.BandwidthException( 'Server reported very limited bandwidth: ' + str( e ) )
2019-10-02 23:38:59 +00:00
2019-11-28 01:11:46 +00:00
self._WaitOnServersideBandwidth( 'server reported limited bandwidth' )
2019-10-02 23:38:59 +00:00
2018-04-25 22:07:52 +00:00
except HydrusExceptions.ShouldReattemptNetworkException as e:
2022-06-22 20:43:12 +00:00
self._ResetForAnotherAttempt()
2018-04-25 22:07:52 +00:00
if not self._CanReattemptRequest():
2020-04-16 00:09:42 +00:00
raise HydrusExceptions.NetworkInfrastructureException( 'Ran out of reattempts on this error: ' + str( e ) )
2018-04-25 22:07:52 +00:00
2020-04-16 00:09:42 +00:00
self._WaitOnConnectionError( str( e ) )
2018-04-25 22:07:52 +00:00
2018-04-18 22:10:15 +00:00
except requests.exceptions.ChunkedEncodingError:
2022-06-22 20:43:12 +00:00
self._ResetForAnotherAttempt()
2018-04-18 22:10:15 +00:00
if not self._CanReattemptRequest():
2020-04-16 00:09:42 +00:00
raise HydrusExceptions.StreamTimeoutException( 'Unable to complete request--it broke mid-way!' )
2018-04-18 22:10:15 +00:00
2019-06-19 22:08:48 +00:00
self._WaitOnConnectionError( 'connection broke mid-request' )
2018-04-18 22:10:15 +00:00
except requests.exceptions.SSLError as e:
# note a requests SSLError is a ConnectionError, so careful about catching order here
self.engine.domain_manager.ReportNetworkInfrastructureError( self._url )
raise HydrusExceptions.ConnectionException( 'Problem with SSL: {}'.format( str( e ) ) )
2018-04-18 22:10:15 +00:00
except ( requests.exceptions.ConnectionError, requests.exceptions.ConnectTimeout ):
self._ResetForAnotherConnectionAttempt()
2018-04-18 22:10:15 +00:00
2020-04-16 00:09:42 +00:00
if self._CanReattemptConnection():
self.engine.domain_manager.ReportNetworkInfrastructureError( self._url )
else:
2018-04-18 22:10:15 +00:00
raise HydrusExceptions.ConnectionException( 'Could not connect!' )
2019-06-19 22:08:48 +00:00
self._WaitOnConnectionError( 'connection failed' )
2018-04-18 22:10:15 +00:00
except requests.exceptions.ReadTimeout:
2022-06-22 20:43:12 +00:00
self._ResetForAnotherAttempt()
2018-04-18 22:10:15 +00:00
if not self._CanReattemptRequest():
2020-04-16 00:09:42 +00:00
raise HydrusExceptions.StreamTimeoutException( 'Connection successful, but reading response timed out!' )
2018-04-18 22:10:15 +00:00
2019-06-19 22:08:48 +00:00
self._WaitOnConnectionError( 'read timed out' )
2018-04-18 22:10:15 +00:00
2021-05-05 20:12:11 +00:00
except Exception as e:
if '\'Retry\' has no attribute' in str( e ):
# this is that weird requests 2.25.x(?) urllib3 maybe thread safety error
# we'll just try and pause a bit I guess!
self._ResetForAnotherConnectionAttempt()
2021-05-05 20:12:11 +00:00
if self._CanReattemptConnection():
self.engine.domain_manager.ReportNetworkInfrastructureError( self._url )
else:
raise HydrusExceptions.ConnectionException( 'Could not connect!' )
self._WaitOnConnectionError( 'connection failed, and could not recover neatly' )
else:
raise
2019-12-11 23:18:37 +00:00
finally:
2021-01-13 21:48:58 +00:00
with self._lock:
snc = self._session_network_context
self.engine.session_manager.SetSessionDirty( snc )
2019-12-11 23:18:37 +00:00
if response is not None:
# if full data was not read, the response will hang around in connection pool longer than we want
# so just an explicit close here
response.close()
2018-04-18 22:10:15 +00:00
except Exception as e:
with self._lock:
trace = traceback.format_exc()
if not isinstance( e, ( HydrusExceptions.NetworkInfrastructureException, HydrusExceptions.StreamTimeoutException, HydrusExceptions.FileImportRulesException ) ):
2019-01-09 22:59:03 +00:00
HydrusData.Print( trace )
2020-04-16 00:09:42 +00:00
if isinstance( e, HydrusExceptions.NetworkInfrastructureException ):
self.engine.domain_manager.ReportNetworkInfrastructureError( self._url )
2019-01-09 22:59:03 +00:00
self._status_text = 'Error: ' + str( e )
2018-04-18 22:10:15 +00:00
self._SetError( e, trace )
finally:
with self._lock:
self._SetDone()
def TokensOK( self ) -> bool:
2018-08-22 21:10:59 +00:00
with self._lock:
2018-09-12 21:36:26 +00:00
need_token = self._gallery_token_name is not None and not self._gallery_token_consumed
sld = self._second_level_domain
gtn = self._gallery_token_name
if need_token:
2018-10-31 21:41:14 +00:00
( consumed, next_timestamp ) = self.engine.bandwidth_manager.TryToConsumeAGalleryToken( sld, gtn )
2018-09-12 21:36:26 +00:00
with self._lock:
2018-08-22 21:10:59 +00:00
if consumed:
self._status_text = 'starting soon'
2018-09-12 21:36:26 +00:00
2018-08-22 21:10:59 +00:00
self._gallery_token_consumed = True
else:
if HydrusData.TimeHasPassed( self._last_gallery_token_estimate ) and not HydrusData.TimeHasPassed( self._last_gallery_token_estimate + 3 ):
self._status_text = 'a different {} got the chance to work'.format( self._gallery_token_name )
else:
self._status_text = 'waiting to start: {}'.format( ClientData.TimestampToPrettyTimeDelta( next_timestamp, just_now_threshold = 2, just_now_string = 'checking', no_prefix = True ) )
self._last_gallery_token_estimate = next_timestamp
2018-08-22 21:10:59 +00:00
self._Sleep( 0.8 )
2018-08-22 21:10:59 +00:00
return False
2018-09-12 21:36:26 +00:00
return True
2018-08-22 21:10:59 +00:00
2020-06-17 21:31:54 +00:00
def TryToStartBandwidth( self ):
with self._lock:
if self._ObeysBandwidth():
result = self.engine.bandwidth_manager.TryToStartRequest( self._network_contexts )
if result:
self._bandwidth_tracker.ReportRequestUsed()
else:
( bandwidth_waiting_duration, bandwidth_network_context ) = self.engine.bandwidth_manager.GetWaitingEstimateAndContext( self._network_contexts )
will_override = self._bandwidth_manual_override_delayed_timestamp is not None
override_coming_first = False
if will_override:
override_waiting_duration = self._bandwidth_manual_override_delayed_timestamp - HydrusData.GetNow()
override_coming_first = override_waiting_duration < bandwidth_waiting_duration
just_now_threshold = 2
if override_coming_first:
waiting_duration = override_waiting_duration
waiting_str = 'overriding bandwidth ' + ClientData.TimestampToPrettyTimeDelta( self._bandwidth_manual_override_delayed_timestamp, just_now_string = 'imminently', just_now_threshold = just_now_threshold )
2020-06-17 21:31:54 +00:00
else:
waiting_duration = bandwidth_waiting_duration
bandwidth_time_estimate = HydrusData.GetNow() + waiting_duration
if HydrusData.TimeHasPassed( self._last_bandwidth_time_estimate ) and not HydrusData.TimeHasPassed( self._last_bandwidth_time_estimate + 3 ):
waiting_str = 'a different network job got the bandwidth'
else:
waiting_str = 'bandwidth free ' + ClientData.TimestampToPrettyTimeDelta( bandwidth_time_estimate, just_now_string = 'imminently', just_now_threshold = just_now_threshold )
self._last_bandwidth_time_estimate = bandwidth_time_estimate
2020-06-17 21:31:54 +00:00
waiting_str += '\u2026 (' + bandwidth_network_context.ToHumanString() + ')'
self._status_text = waiting_str
if waiting_duration > 1200:
self._Sleep( 30 )
elif waiting_duration > 120:
self._Sleep( 10 )
elif waiting_duration > 10:
self._Sleep( 0.8 )
2020-06-17 21:31:54 +00:00
return result
else:
self._bandwidth_tracker.ReportRequestUsed()
self.engine.bandwidth_manager.ReportRequestUsed( self._network_contexts )
return True
2018-04-18 22:10:15 +00:00
def WaitUntilDone( self ):
while True:
if self.IsDone():
break
2021-12-22 22:31:23 +00:00
self._is_done_event.wait( 5 )
2018-04-18 22:10:15 +00:00
with self._lock:
2022-01-19 21:28:59 +00:00
if HG.started_shutdown or HydrusThreading.IsThreadShuttingDown():
2018-04-18 22:10:15 +00:00
raise HydrusExceptions.ShutdownException()
elif self._error_exception is not None:
if isinstance( self._error_exception, Exception ):
raise self._error_exception
else:
raise Exception( 'Problem in network error handling.' )
elif self._IsCancelled():
if self._method == 'POST':
2018-10-31 21:41:14 +00:00
message = 'Upload cancelled: ' + self._status_text
2018-04-18 22:10:15 +00:00
else:
2018-10-31 21:41:14 +00:00
message = 'Download cancelled: ' + self._status_text
2018-04-18 22:10:15 +00:00
raise HydrusExceptions.CancelledException( message )
def WillingToWaitOnInvalidLogin( self ) -> bool:
2018-10-31 21:41:14 +00:00
return self.WILLING_TO_WAIT_ON_INVALID_LOGIN
2018-04-18 22:10:15 +00:00
class NetworkJobDownloader( NetworkJob ):
def __init__( self, downloader_page_key, method, url, body = None, referral_url = None, temp_path = None ):
self._downloader_page_key = downloader_page_key
NetworkJob.__init__( self, method, url, body = body, referral_url = referral_url, temp_path = temp_path )
def _GenerateNetworkContexts( self ):
network_contexts = NetworkJob._GenerateNetworkContexts( self )
network_contexts.append( ClientNetworkingContexts.NetworkContext( CC.NETWORK_CONTEXT_DOWNLOADER_PAGE, self._downloader_page_key ) )
return network_contexts
class NetworkJobSubscription( NetworkJob ):
2018-10-31 21:41:14 +00:00
WILLING_TO_WAIT_ON_INVALID_LOGIN = False
2018-04-18 22:10:15 +00:00
def __init__( self, subscription_key, method, url, body = None, referral_url = None, temp_path = None ):
self._subscription_key = subscription_key
NetworkJob.__init__( self, method, url, body = body, referral_url = referral_url, temp_path = temp_path )
def _GenerateNetworkContexts( self ):
network_contexts = NetworkJob._GenerateNetworkContexts( self )
network_contexts.append( ClientNetworkingContexts.NetworkContext( CC.NETWORK_CONTEXT_SUBSCRIPTION, self._subscription_key ) )
return network_contexts
2022-03-30 20:28:13 +00:00
def CheckHydrusVersion( service_type, response ):
2018-04-18 22:10:15 +00:00
2022-03-30 20:28:13 +00:00
service_string = HC.service_string_lookup[ service_type ]
2018-04-18 22:10:15 +00:00
2022-03-30 20:28:13 +00:00
headers = response.headers
if 'server' in headers and service_string in headers[ 'server' ]:
2018-04-18 22:10:15 +00:00
2022-03-30 20:28:13 +00:00
server_header = headers[ 'server' ]
2018-04-18 22:10:15 +00:00
2022-03-30 20:28:13 +00:00
elif 'hydrus-server' in headers and service_string in headers[ 'hydrus-server' ]:
2018-04-18 22:10:15 +00:00
2022-03-30 20:28:13 +00:00
server_header = headers[ 'hydrus-server' ]
2018-04-18 22:10:15 +00:00
2022-03-30 20:28:13 +00:00
else:
2018-04-18 22:10:15 +00:00
2022-03-30 20:28:13 +00:00
raise HydrusExceptions.WrongServiceTypeException( 'Target was not a ' + service_string + '!' )
2018-04-18 22:10:15 +00:00
2022-03-30 20:28:13 +00:00
( service_string_gumpf, network_version ) = server_header.split( '/' )
network_version = int( network_version )
if network_version != HC.NETWORK_VERSION:
if network_version > HC.NETWORK_VERSION:
2021-07-28 21:12:00 +00:00
2022-03-30 20:28:13 +00:00
message = 'Your client is out of date; please download the latest release.'
2021-07-28 21:12:00 +00:00
else:
2018-04-18 22:10:15 +00:00
2022-03-30 20:28:13 +00:00
message = 'The server is out of date; please ask its admin to update to the latest release.'
2018-04-18 22:10:15 +00:00
2022-03-30 20:28:13 +00:00
raise HydrusExceptions.NetworkVersionException( 'Network version mismatch! The server\'s network version was ' + str( network_version ) + ', whereas your client\'s is ' + str( HC.NETWORK_VERSION ) + '! ' + message )
2018-04-18 22:10:15 +00:00
2022-03-30 20:28:13 +00:00
class NetworkJobHydrus( NetworkJob ):
WILLING_TO_WAIT_ON_INVALID_LOGIN = False
IS_HYDRUS_SERVICE = True
def __init__( self, service_key, method, url, body = None, referral_url = None, temp_path = None ):
2018-04-18 22:10:15 +00:00
2022-03-30 20:28:13 +00:00
self._service_key = service_key
NetworkJob.__init__( self, method, url, body = body, referral_url = referral_url, temp_path = temp_path )
2018-04-18 22:10:15 +00:00
def _GenerateNetworkContexts( self ):
2022-03-30 20:28:13 +00:00
network_contexts = [
ClientNetworkingContexts.GLOBAL_NETWORK_CONTEXT,
ClientNetworkingContexts.NetworkContext( CC.NETWORK_CONTEXT_HYDRUS, self._service_key )
]
2018-04-18 22:10:15 +00:00
return network_contexts
def _GenerateSpecificNetworkContexts( self ):
# we store cookies on and login to the same hydrus-specific context
session_network_context = ClientNetworkingContexts.NetworkContext( CC.NETWORK_CONTEXT_HYDRUS, self._service_key )
login_network_context = session_network_context
return ( session_network_context, login_network_context )
def _ReportDataUsed( self, num_bytes ):
service = self.engine.controller.services_manager.GetService( self._service_key )
service_type = service.GetServiceType()
if service_type in HC.RESTRICTED_SERVICES:
account = service.GetAccount()
account.ReportDataUsed( num_bytes )
NetworkJob._ReportDataUsed( self, num_bytes )
2021-04-20 22:01:22 +00:00
def _SendRequestAndGetResponse( self ) -> requests.Response:
2018-04-18 22:10:15 +00:00
service = self.engine.controller.services_manager.GetService( self._service_key )
service_type = service.GetServiceType()
if service_type in HC.RESTRICTED_SERVICES:
account = service.GetAccount()
account.ReportRequestUsed()
response = NetworkJob._SendRequestAndGetResponse( self )
2021-04-20 22:01:22 +00:00
if response.ok and service_type in HC.RESTRICTED_SERVICES:
2018-04-18 22:10:15 +00:00
2022-03-30 20:28:13 +00:00
CheckHydrusVersion( service_type, response )
2018-04-18 22:10:15 +00:00
return response
2018-11-28 22:31:04 +00:00
def IsHydrusJob( self ):
with self._lock:
return True
2020-05-13 19:03:16 +00:00
class NetworkJobIPFS( NetworkJob ):
2020-08-27 01:00:42 +00:00
IS_IPFS_SERVICE = True
def __init__( self, url, body = None, referral_url = None, temp_path = None ):
method = 'POST'
2020-05-13 19:03:16 +00:00
NetworkJob.__init__( self, method, url, body = body, referral_url = referral_url, temp_path = temp_path )
self.OnlyTryConnectionOnce()
self.OverrideBandwidth()
2020-08-27 01:00:42 +00:00
def _GetTimeouts( self ):
( connect_timeout, read_timeout ) = NetworkJob._GetTimeouts( self )
read_timeout = max( 7200, read_timeout )
return ( connect_timeout, read_timeout )
2020-05-13 19:03:16 +00:00
2018-04-18 22:10:15 +00:00
class NetworkJobWatcherPage( NetworkJob ):
2018-05-23 21:05:06 +00:00
def __init__( self, watcher_key, method, url, body = None, referral_url = None, temp_path = None ):
2018-04-18 22:10:15 +00:00
2018-05-23 21:05:06 +00:00
self._watcher_key = watcher_key
2018-04-18 22:10:15 +00:00
NetworkJob.__init__( self, method, url, body = body, referral_url = referral_url, temp_path = temp_path )
def _GenerateNetworkContexts( self ):
network_contexts = NetworkJob._GenerateNetworkContexts( self )
2018-05-23 21:05:06 +00:00
network_contexts.append( ClientNetworkingContexts.NetworkContext( CC.NETWORK_CONTEXT_WATCHER_PAGE, self._watcher_key ) )
2018-04-18 22:10:15 +00:00
return network_contexts