import calendar import io import os import typing import requests import threading import traceback import time import urllib from hydrus.core import HydrusConstants as HC from hydrus.core import HydrusData from hydrus.core import HydrusExceptions from hydrus.core import HydrusGlobals as HG from hydrus.core import HydrusThreading from hydrus.core import HydrusText from hydrus.core.networking import HydrusNetworking from hydrus.client import ClientConstants as CC from hydrus.client import ClientData from hydrus.client import ClientTime from hydrus.client.networking import ClientNetworkingContexts from hydrus.client.networking import ClientNetworkingFunctions try: import cloudscraper CLOUDSCRAPER_OK = True try: # help pyinstaller import pyparsing PYPARSING_OK = True except: PYPARSING_OK = False except: CLOUDSCRAPER_OK = False PYPARSING_OK = False def ConvertStatusCodeAndDataIntoExceptionInfo( status_code, data, is_hydrus_service = False ): ( error_text, encoding ) = HydrusText.NonFailingUnicodeDecode( data, 'utf-8' ) print_long_error_text = True if status_code == 304: print_long_error_text = False eclass = HydrusExceptions.NotModifiedException elif status_code == 400: eclass = HydrusExceptions.BadRequestException elif status_code == 401: eclass = HydrusExceptions.MissingCredentialsException elif status_code == 403: eclass = HydrusExceptions.InsufficientCredentialsException elif status_code == 404: print_long_error_text = False eclass = HydrusExceptions.NotFoundException elif status_code == 406: eclass = HydrusExceptions.NotAcceptable elif status_code == 409: eclass = HydrusExceptions.ConflictException elif status_code == 416: eclass = HydrusExceptions.RangeNotSatisfiableException elif status_code == 419: eclass = HydrusExceptions.SessionException elif status_code == 426: eclass = HydrusExceptions.NetworkVersionException elif status_code == 429: eclass = HydrusExceptions.BandwidthException elif status_code == 509: eclass = HydrusExceptions.BandwidthException elif status_code == 502: eclass = HydrusExceptions.ShouldReattemptNetworkException elif status_code == 503: if is_hydrus_service: eclass = HydrusExceptions.ServerBusyException else: eclass = HydrusExceptions.ShouldReattemptNetworkException elif status_code >= 500: eclass = HydrusExceptions.ServerException else: eclass = HydrusExceptions.NetworkException if len( error_text ) > 1024 and print_long_error_text: large_chunk = error_text[ : 512 * 1024 ] smaller_chunk = large_chunk[:256] HydrusData.DebugPrint( large_chunk ) error_text = 'The server\'s error text was too long to display. The first part follows, while a larger chunk has been written to the log.' error_text += os.linesep error_text += smaller_chunk e = eclass( '{}: {}'.format( status_code, error_text ) ) return ( e, error_text ) class NetworkJob( object ): WILLING_TO_WAIT_ON_INVALID_LOGIN = True IS_HYDRUS_SERVICE = False IS_IPFS_SERVICE = False def __init__( self, method: str, url: str, body = None, referral_url = None, temp_path = None ): if body is not None and isinstance( body, str ): body = bytes( body, 'utf-8' ) self.engine = None self._lock = threading.Lock() self._method = method self._url = url self._current_connection_attempt_number = 1 self._current_request_attempt_number = 1 self._this_is_a_one_shot_request = False self._we_tried_cloudflare_once = False self._domain = ClientNetworkingFunctions.ConvertURLIntoDomain( self._url ) self._second_level_domain = ClientNetworkingFunctions.ConvertURLIntoSecondLevelDomain( self._url ) self._body = body self._referral_url = referral_url self._actual_fetched_url = self._url self._temp_path = temp_path self._response_server_header = None self._response_last_modified = None if self._temp_path is None: # 100MB HTML file lmao self._max_allowed_bytes = 104857600 else: self._max_allowed_bytes = None self._files = None self._for_login = False self._additional_headers = {} self._creation_time = HydrusData.GetNow() self._bandwidth_tracker = HydrusNetworking.BandwidthTracker() self._connection_error_wake_time = 0 self._serverside_bandwidth_wake_time = 0 self._wake_time_float = 0.0 self._content_type = None self._response_mime = None self._encoding = 'utf-8' self._encoding_confirmed = False self._stream_io = io.BytesIO() self._error_exception = Exception( 'Exception not initialised.' ) # PyLint hint, wew self._error_exception = None self._error_text = None self._is_done_event = threading.Event() self._is_started = False self._is_done = False self._is_cancelled = False self._gallery_token_name = None self._gallery_token_consumed = False self._last_gallery_token_estimate = 0 self._bandwidth_manual_override = False self._bandwidth_manual_override_delayed_timestamp = None self._last_bandwidth_time_estimate = 0 self._last_time_ongoing_bandwidth_failed = 0 self._status_text = 'initialising\u2026' self._num_bytes_read = 0 self._num_bytes_to_read = 1 self._num_bytes_read_is_accurate = True self._number_of_concurrent_empty_chunks = 0 self._file_import_options = None self._network_contexts = self._GenerateNetworkContexts() ( self._session_network_context, self._login_network_context ) = self._GenerateSpecificNetworkContexts() def _CanReattemptConnection( self ): if self._this_is_a_one_shot_request: return False max_connection_attempts_allowed = HG.client_controller.new_options.GetInteger( 'max_connection_attempts_allowed' ) return self._current_connection_attempt_number <= max_connection_attempts_allowed def _CanReattemptRequest( self ): if self._this_is_a_one_shot_request: return False if self._method == 'GET': max_attempts_allowed = HG.client_controller.new_options.GetInteger( 'max_request_attempts_allowed_get' ) else: max_attempts_allowed = 1 return self._current_request_attempt_number <= max_attempts_allowed def _GenerateModifiedDate( self, response: requests.Response ): if 'Last-Modified' in response.headers: # Thu, 20 May 2010 07:00:23 GMT # these are always in GMT last_modified_string = response.headers[ 'Last-Modified' ] if last_modified_string.endswith( ' GMT' ): last_modified_string = last_modified_string[:-4] try: struct_time = time.strptime( last_modified_string, '%a, %d %b %Y %H:%M:%S' ) # the given struct is in GMT, so calendar.timegm is appropriate here last_modified_time = int( calendar.timegm( struct_time ) ) if ClientTime.TimestampIsSensible( last_modified_time ): self._response_last_modified = last_modified_time except: pass def _GenerateNetworkContexts( self ): network_contexts = [ ClientNetworkingContexts.GLOBAL_NETWORK_CONTEXT ] domains = ClientNetworkingFunctions.ConvertDomainIntoAllApplicableDomains( self._domain ) network_contexts.extend( ( ClientNetworkingContexts.NetworkContext( CC.NETWORK_CONTEXT_DOMAIN, domain ) for domain in domains ) ) return network_contexts def _GenerateSpecificNetworkContexts( self ): # we always store cookies in the larger session (even if the cookie itself refers to a subdomain in the session object) # but we can login to a specific subdomain session_network_context = ClientNetworkingContexts.NetworkContext( CC.NETWORK_CONTEXT_DOMAIN, self._second_level_domain ) login_network_context = ClientNetworkingContexts.NetworkContext( CC.NETWORK_CONTEXT_DOMAIN, self._domain ) return ( session_network_context, login_network_context ) def _GetTimeouts( self ): connect_timeout = HG.client_controller.new_options.GetInteger( 'network_timeout' ) read_timeout = connect_timeout * 6 return ( connect_timeout, read_timeout ) def _IsCancelled( self ): if self._is_cancelled: return True if HG.started_shutdown: return True return False def _IsDone( self ): if self._is_done: return True if HG.started_shutdown or HydrusThreading.IsThreadShuttingDown(): return True return False def _ObeysBandwidth( self ): if self._bandwidth_manual_override: return False if self._bandwidth_manual_override_delayed_timestamp is not None and HydrusData.TimeHasPassed( self._bandwidth_manual_override_delayed_timestamp ): return False if self._method == 'POST': return False if self._for_login: return False return True def _OngoingBandwidthOK( self ): now = HydrusData.GetNow() if now == self._last_time_ongoing_bandwidth_failed: # it won't have changed, so no point spending any cpu checking return False else: result = self.engine.bandwidth_manager.CanContinueDownload( self._network_contexts ) if not result: self._last_time_ongoing_bandwidth_failed = now return result def _ParseFirstResponseHeaders( self, response: requests.Response ): with self._lock: if 'Content-Type' in response.headers: self._content_type = response.headers[ 'Content-Type' ] if self._content_type is not None and self._content_type in HC.mime_enum_lookup: self._response_mime = HC.mime_enum_lookup[ self._content_type ] else: self._response_mime = None if 'content-length' in response.headers: self._num_bytes_to_read = int( response.headers[ 'content-length' ] ) else: self._num_bytes_to_read = None if response.encoding is not None: self._encoding = response.encoding if response.ok: # i.e. we got what we expected, not some error if 'content-length' in response.headers: if self._max_allowed_bytes is not None and self._num_bytes_to_read > self._max_allowed_bytes: raise HydrusExceptions.NetworkException( 'The url was apparently {} but the max network size for this type of job is {}!'.format( HydrusData.ToHumanBytes( self._num_bytes_to_read ), HydrusData.ToHumanBytes( self._max_allowed_bytes ) ) ) if self._file_import_options is not None: is_complete_file_size = True self._file_import_options.CheckNetworkDownload( self._response_mime, self._num_bytes_to_read, is_complete_file_size ) def _ReadResponse( self, response: requests.Response, stream_dest ): if 'content-range' in response.headers: content_range = response.headers[ 'content-range' ] # Content-Range: -/ # range and size can be * if content_range.startswith( 'bytes ' ): content_range = content_range[6:] if '/' in content_range: ( byte_range, size ) = content_range.split( '/', 1 ) if byte_range != '*' and '-' in byte_range: ( byte_start, byte_end ) = byte_range.split( '-', 1 ) try: byte_start = int( byte_start ) if byte_start != self._num_bytes_read: # this server be crazy # I guess in some cases we might be able to fast forward a < byte_start, but we don't have that raw byte access tech yet # and if byte_start > num_bytes_read, then lmao raise HydrusExceptions.NetworkException( 'This server delivered an undesired Range response! We asked for Range "{}" and got Content-Range "{}" back!'.format( response.request.headers[ 'range' ], response.headers[ 'content-range' ] ) ) except: pass if size != '*': if self._num_bytes_to_read is None: try: num_bytes = int( size ) self._num_bytes_to_read = num_bytes except: pass starting_num_bytes_read = self._num_bytes_read for chunk in response.iter_content( chunk_size = 65536 ): if self._IsCancelled(): raise HydrusExceptions.CancelledException() stream_dest.write( chunk ) # get the raw bytes read, not the length of the chunk, as there may be transfer-encoding (chunked, gzip etc...) total_bytes_read_in_this_response = response.raw.tell() if total_bytes_read_in_this_response == 0: # this seems to occur when the response is Transfer-Encoding: chunked (note, no Content-Length) # there's no great way to track raw bytes read in this case. the iter_content chunk can be unzipped from that # nonetheless, requests does raise ChunkedEncodingError if it stops early, so not a huge deal to miss here, just slightly off bandwidth tracking self._num_bytes_read_is_accurate = False chunk_num_bytes = len( chunk ) self._num_bytes_read += chunk_num_bytes else: previous_num_bytes_read = self._num_bytes_read self._num_bytes_read = starting_num_bytes_read + total_bytes_read_in_this_response chunk_num_bytes = self._num_bytes_read - previous_num_bytes_read with self._lock: if self._num_bytes_to_read is not None and self._num_bytes_read_is_accurate and self._num_bytes_read > self._num_bytes_to_read: raise HydrusExceptions.NetworkException( 'Too much data: Was expecting {} but server continued responding!'.format( HydrusData.ToHumanBytes( self._num_bytes_to_read ) ) ) if self._max_allowed_bytes is not None and self._num_bytes_read > self._max_allowed_bytes: raise HydrusExceptions.NetworkException( 'The url exceeded the max network size for this type of job, which is {}!'.format( HydrusData.ToHumanBytes( self._max_allowed_bytes ) ) ) if self._file_import_options is not None: is_complete_file_size = False self._file_import_options.CheckNetworkDownload( self._response_mime, self._num_bytes_read, is_complete_file_size ) self._ReportDataUsed( chunk_num_bytes ) self._WaitOnOngoingBandwidth() if HG.started_shutdown: raise HydrusExceptions.ShutdownException() # stick with GET for now. if there is a complex way to range-chunk a POST, we'll deal with it then, but I don't want to spam file uploads to IQDB by accident etc... download_is_definitely_incomplete = self._method == 'GET' and self._num_bytes_to_read is not None and self._num_bytes_read_is_accurate and self._num_bytes_read < self._num_bytes_to_read we_read_some_data = self._num_bytes_read > starting_num_bytes_read if download_is_definitely_incomplete and not we_read_some_data: self._number_of_concurrent_empty_chunks += 1 if self._number_of_concurrent_empty_chunks > 2: raise HydrusExceptions.NetworkException( 'The server appeared to want to send this URL in ranged chunks, but this chunk was empty!' ) more_to_download = True else: self._number_of_concurrent_empty_chunks = 0 more_to_download = we_read_some_data and download_is_definitely_incomplete if not more_to_download: if self._file_import_options is not None: is_complete_file_size = True self._file_import_options.CheckNetworkDownload( self._response_mime, self._num_bytes_read, is_complete_file_size ) return more_to_download def _ReportDataUsed( self, num_bytes ): self._bandwidth_tracker.ReportDataUsed( num_bytes ) self.engine.bandwidth_manager.ReportDataUsed( self._network_contexts, num_bytes ) def _ResetForAnotherAttempt( self ): self._current_request_attempt_number += 1 self._content_type = None self._response_mime = None self._encoding = 'utf-8' self._encoding_confirmed = False self._stream_io = io.BytesIO() self._num_bytes_read = 0 self._num_bytes_to_read = 1 self._num_bytes_read_is_accurate = True self._number_of_concurrent_empty_chunks = 0 def _ResetForAnotherConnectionAttempt( self ): self._ResetForAnotherAttempt() self._current_connection_attempt_number += 1 self._current_request_attempt_number = 1 def _SendRequestAndGetResponse( self ) -> requests.Response: with self._lock: ncs = list( self._network_contexts ) headers = self.engine.domain_manager.GetHeaders( ncs ) with self._lock: method = self._method url = self._url data = self._body files = self._files if self.IS_HYDRUS_SERVICE or self.IS_IPFS_SERVICE: headers[ 'User-Agent' ] = 'hydrus client/' + str( HC.NETWORK_VERSION ) referral_url = self.engine.domain_manager.GetReferralURL( url, self._referral_url ) url_class = self.engine.domain_manager.GetURLClass( url ) if url_class is not None: headers.update( url_class.GetHeaderOverrides() ) if url_class is None or url_class.GetURLType() in ( HC.URL_TYPE_FILE, HC.URL_TYPE_UNKNOWN ): headers[ 'Range' ] = 'bytes={}-'.format( self._num_bytes_read ) if HG.network_report_mode: HydrusData.ShowText( 'Network Jobs Referral URLs for {}:{}Given: {}{}Used: {}'.format( url, os.linesep, self._referral_url, os.linesep, referral_url ) ) if referral_url is not None: try: referral_url.encode( 'latin-1' ) except UnicodeEncodeError: # quick and dirty way to quote this url when it comes here with full unicode chars. not perfect, but does the job referral_url = urllib.parse.quote( referral_url, "!#$%&'()*+,/:;=?@[]~" ) if HG.network_report_mode: HydrusData.ShowText( 'Network Jobs Quoted Referral URL for {}:{}{}'.format( url, os.linesep, referral_url ) ) headers[ 'referer' ] = referral_url for ( key, value ) in self._additional_headers.items(): headers[ key ] = value if self._num_bytes_read == 0: self._status_text = 'sending request\u2026' snc = self._session_network_context session = self.engine.session_manager.GetSession( snc ) ( connect_timeout, read_timeout ) = self._GetTimeouts() response = session.request( method, url, data = data, files = files, headers = headers, stream = True, timeout = ( connect_timeout, read_timeout ) ) with self._lock: if self._body is not None: self._ReportDataUsed( len( self._body ) ) return response def _SetCancelled( self ): self._is_cancelled = True self._SetDone() def _SetError( self, e, error ): self._error_exception = e self._error_text = error if HG.network_report_mode: HydrusData.ShowText( 'Network error should follow:' ) HydrusData.ShowException( e ) HydrusData.ShowText( error ) self._SetDone() def _SetDone( self ): self._is_done = True self._is_done_event.set() def _Sleep( self, seconds_float ): self._wake_time_float = HydrusData.GetNowFloat() + seconds_float def _SolveCloudFlare( self, response ): if CLOUDSCRAPER_OK: try: # cloudscraper refactored a bit around 1.2.60, so we now have some different paths to what we want old_class_object = None new_class_instance = None if hasattr( cloudscraper, 'CloudScraper' ): old_class_object = getattr( cloudscraper, 'CloudScraper' ) if hasattr( cloudscraper, 'cloudflare' ): m = getattr( cloudscraper, 'cloudflare' ) if hasattr( m, 'Cloudflare' ): new_class_object = getattr( m, 'Cloudflare' ) cs = cloudscraper.CloudScraper() new_class_instance = new_class_object( cs ) possible_paths = [ ( old_class_object, 'is_Firewall_Blocked' ), ( new_class_instance, 'is_Firewall_Blocked' ) ] is_firewall = False for ( m, method_name ) in possible_paths: if m is None: continue if hasattr( m, method_name ): is_firewall = getattr( m, method_name )( response ) if is_firewall: break possible_paths = [ ( old_class_object, 'is_reCaptcha_Challenge' ), ( old_class_object, 'is_Captcha_Challenge' ), ( new_class_instance, 'is_Captcha_Challenge' ) ] is_captcha = False for ( m, method_name ) in possible_paths: if m is None: continue if hasattr( m, method_name ): is_captcha = getattr( m, method_name )( response ) if is_captcha: break possible_paths = [ ( old_class_object, 'is_IUAM_Challenge' ), ( new_class_instance, 'is_IUAM_Challenge' ), ( new_class_instance, 'is_New_IUAM_Challenge' ) ] is_iuam = False for ( m, method_name ) in possible_paths: if m is None: continue if hasattr( m, method_name ): is_iuam = getattr( m, method_name )( response ) if is_iuam: break is_attemptable = is_captcha or is_iuam except Exception as e: HydrusData.Print( 'cloudflarescraper had an error looking at "{}" response: {}'.format( self._url, str( e ) ) ) HydrusData.PrintException( e ) return if is_firewall: raise HydrusExceptions.CloudFlareException( 'It looks like the site has Firewall-Blocked your IP or IP range with CloudFlare.' ) if is_attemptable: try: with self._lock: ncs = list( self._network_contexts ) snc = self._session_network_context headers = self.engine.domain_manager.GetHeaders( ncs ) if 'User-Agent' not in headers: raise HydrusExceptions.CloudFlareException( 'No User-Agent set for hydrus!' ) user_agent = headers[ 'User-Agent' ] ( cf_tokens, user_agent ) = cloudscraper.get_tokens( self._url, browser = { 'custom' : user_agent } ) session = self.engine.session_manager.GetSession( snc ) cf_cookies = [ cookie for cookie in session.cookies if cookie.name.startswith( '__cf' ) ] for cookie in cf_cookies: session.cookies.clear( cookie.domain, cookie.path, cookie.name ) domain = '.{}'.format( ClientNetworkingFunctions.ConvertURLIntoSecondLevelDomain( self._url ) ) path = '/' expires = HydrusData.GetNow() + 30 * 86400 secure = True rest = { 'HttpOnly' : None, 'SameSite' : 'None' } for ( name, value ) in cf_tokens.items(): ClientNetworkingFunctions.AddCookieToSession( session, name, value, domain, path, expires, secure = secure, rest = rest ) self.engine.session_manager.SetSessionDirty( snc ) except Exception as e: raise HydrusExceptions.CloudFlareException( 'This looks like an unsolvable CloudFlare captcha! Best solution we know of is to copy cookies and User-Agent header from your web browser to hydrus!' ) raise HydrusExceptions.ShouldReattemptNetworkException( 'CloudFlare needed solving.' ) def _WaitOnConnectionError( self, status_text: str ): connection_error_wait_time = HG.client_controller.new_options.GetInteger( 'connection_error_wait_time' ) self._connection_error_wake_time = HydrusData.GetNow() + ( ( self._current_connection_attempt_number - 1 ) * connection_error_wait_time ) while not HydrusData.TimeHasPassed( self._connection_error_wake_time ) and not self._IsCancelled(): with self._lock: self._status_text = '{} - retrying in {}'.format( status_text, ClientData.TimestampToPrettyTimeDelta( self._connection_error_wake_time ) ) time.sleep( 1 ) self._WaitOnNetworkTrafficPaused( status_text ) def _WaitOnNetworkTrafficPaused( self, status_text: str ): while HG.client_controller.new_options.GetBoolean( 'pause_all_new_network_traffic' ) and not self._IsCancelled(): with self._lock: self._status_text = '{} - now waiting because all network traffic is paused'.format( status_text ) time.sleep( 1 ) def _WaitOnOngoingBandwidth( self ): while not self._OngoingBandwidthOK() and not self._IsCancelled(): time.sleep( 0.1 ) def _WaitOnServersideBandwidth( self, status_text: str ): # 429 or 509 response from server. basically means 'I'm under big load mate' # a future version of this could def talk to domain manager and add a temp delay so other network jobs can be informed serverside_bandwidth_wait_time = HG.client_controller.new_options.GetInteger( 'serverside_bandwidth_wait_time' ) problem_rating = ( self._current_connection_attempt_number + self._current_request_attempt_number ) - 1 self._serverside_bandwidth_wake_time = HydrusData.GetNow() + ( problem_rating * serverside_bandwidth_wait_time ) while not HydrusData.TimeHasPassed( self._serverside_bandwidth_wake_time ) and not self._IsCancelled(): with self._lock: self._status_text = '{} - retrying in {}'.format( status_text, ClientData.TimestampToPrettyTimeDelta( self._serverside_bandwidth_wake_time ) ) time.sleep( 1 ) self._WaitOnNetworkTrafficPaused( status_text ) def AddAdditionalHeader( self, key, value ): with self._lock: self._additional_headers[ key ] = value def BandwidthOK( self ): with self._lock: if self._ObeysBandwidth(): return self.engine.bandwidth_manager.CanDoWork( self._network_contexts ) else: return True def Cancel( self, status_text = None ): with self._lock: if status_text is None: status_text = 'cancelled!' self._status_text = status_text self._SetCancelled() def CanValidateInPopup( self ): with self._lock: return self.engine.domain_manager.CanValidateInPopup( self._network_contexts ) def CheckCanLogin( self ): with self._lock: if self._for_login: raise HydrusExceptions.ValidationException( 'Login jobs should not be asked if they can login!' ) else: return self.engine.login_manager.CheckCanLogin( self._login_network_context ) def CurrentlyWaitingOnConnectionError( self ): with self._lock: return not HydrusData.TimeHasPassed( self._connection_error_wake_time ) def CurrentlyWaitingOnServersideBandwidth( self ): with self._lock: return not HydrusData.TimeHasPassed( self._serverside_bandwidth_wake_time ) def DomainOK( self ): with self._lock: if self._this_is_a_one_shot_request: return True domain_ok = self.engine.domain_manager.DomainOK( self._url ) if not domain_ok: self._status_text = 'This domain has had several serious errors recently. Waiting a bit.' self._Sleep( 10 ) return domain_ok def GenerateLoginProcess( self ): with self._lock: if self._for_login: raise Exception( 'Login jobs should not be asked to generate login processes!' ) else: return self.engine.login_manager.GenerateLoginProcess( self._login_network_context ) def GenerateValidationPopupProcess( self ): with self._lock: return self.engine.domain_manager.GenerateValidationPopupProcess( self._network_contexts ) def GetActualFetchedURL( self ): with self._lock: return self._actual_fetched_url def GetContentBytes( self ): with self._lock: self._stream_io.seek( 0 ) return self._stream_io.read() def GetContentText( self ): data = self.GetContentBytes() ( text, self._encoding ) = HydrusText.NonFailingUnicodeDecode( data, self._encoding ) return text def GetContentType( self ): with self._lock: return self._content_type def GetCreationTime( self ): with self._lock: return self._creation_time def GetDomain( self ): with self._lock: return self._domain def GetErrorException( self ): with self._lock: return self._error_exception def GetErrorText( self ): with self._lock: return self._error_text def GetLastModifiedTime( self ) -> typing.Optional[ int ]: with self._lock: return self._response_last_modified def GetLoginNetworkContext( self ): with self._lock: return self._login_network_context def GetNetworkContexts( self ): with self._lock: return list( self._network_contexts ) def GetSecondLevelDomain( self ): with self._lock: return self._second_level_domain def GetSession( self ): with self._lock: snc = self._session_network_context session = self.engine.session_manager.GetSession( snc ) return session def GetStatus( self ): with self._lock: return ( self._status_text, self._bandwidth_tracker.GetUsage( HC.BANDWIDTH_TYPE_DATA, 1 ), self._num_bytes_read, self._num_bytes_to_read ) def GetTotalDataUsed( self ): with self._lock: return self._bandwidth_tracker.GetUsage( HC.BANDWIDTH_TYPE_DATA, None ) def GetURL( self ): with self._lock: return self._url def HasError( self ): with self._lock: return self._error_exception is not None def IsAsleep( self ): with self._lock: return not HydrusData.TimeHasPassedFloat( self._wake_time_float ) def IsCancelled( self ): with self._lock: return self._IsCancelled() def IsCloudFlareCache( self ): with self._lock: return self._response_server_header is not None and self._response_server_header == 'cloudflare' def IsDone( self ): with self._lock: return self._IsDone() def IsHydrusJob( self ): with self._lock: return False def IsValid( self ): with self._lock: return self.engine.domain_manager.IsValid( self._network_contexts ) def NeedsLogin( self ): with self._lock: if self._for_login: return False else: return self.engine.login_manager.NeedsLogin( self._login_network_context ) def NoEngineYet( self ): return self.engine is None def ObeysBandwidth( self ): return self._ObeysBandwidth() def OnlyTryConnectionOnce( self ): self._this_is_a_one_shot_request = True def OverrideBandwidth( self, delay = None ): with self._lock: if delay is None: self._bandwidth_manual_override = True self._wake_time_float = 0.0 else: self._bandwidth_manual_override_delayed_timestamp = HydrusData.GetNow() + delay self._wake_time_float = min( self._wake_time_float, self._bandwidth_manual_override_delayed_timestamp + 1.0 ) def OverrideConnectionErrorWait( self ): with self._lock: self._connection_error_wake_time = 0 def OverrideServersideBandwidthWait( self ): with self._lock: self._serverside_bandwidth_wake_time = 0 def OverrideToken( self ): with self._lock: self._gallery_token_consumed = True self._wake_time_float = 0.0 def ScrubDomainErrors( self ): with self._lock: self.engine.domain_manager.ScrubDomainErrors( self._url ) self._wake_time_float = 0.0 def SetError( self, e: Exception, error: str ): with self._lock: self._SetError( e, error ) def SetFiles( self, files ): with self._lock: self._files = files def SetFileImportOptions( self, file_import_options ): with self._lock: self._file_import_options = file_import_options def SetForLogin( self, for_login: bool ): with self._lock: self._for_login = for_login def SetGalleryToken( self, token_name: str ): with self._lock: self._gallery_token_name = token_name def SetStatus( self, text: str ): with self._lock: self._status_text = text def Sleep( self, seconds ): with self._lock: self._Sleep( seconds ) def Start( self ): try: with self._lock: self._is_started = True self._status_text = 'job started' request_completed = False while not request_completed: if self._IsCancelled(): return response = None try: response = self._SendRequestAndGetResponse() # I think tbh I would rather tell requests not to do 3XX, which is possible with allow_redirects = False on request, and then just raise various 3XX exceptions with url info, so I can requeue easier and keep a record # figuring out correct new url seems a laugh, requests has slight helpers, but lots of exceptions # SessionRedirectMixin here https://requests.readthedocs.io/en/latest/_modules/requests/sessions/ # but this will do as a patch for now self._actual_fetched_url = response.url if self._actual_fetched_url != self._url and HG.network_report_mode: HydrusData.ShowText( 'Network Jobs Redirect: {} -> {}'.format( self._url, self._actual_fetched_url ) ) self._ParseFirstResponseHeaders( response ) if response.ok: with self._lock: self._status_text = 'downloading\u2026' if self._temp_path is None: stream_dest = self._stream_io else: stream_dest = open( self._temp_path, 'wb' ) try: more_to_download = True while more_to_download: more_to_download = self._ReadResponse( response, stream_dest ) if more_to_download: with self._lock: self._status_text = 'downloading next part\u2026' # this will magically have new Range header response = self._SendRequestAndGetResponse() if not response.ok: raise HydrusExceptions.NetworkException( 'Ranged response failed {}'.format( response.status_code ) ) finally: if self._temp_path is not None: stream_dest.close() with self._lock: # we are complete here and worked ok self._GenerateModifiedDate( response ) if 'Server' in response.headers: self._response_server_header = response.headers[ 'Server' ] self._status_text = 'done!' else: with self._lock: self._status_text = str( response.status_code ) + ' - ' + str( response.reason ) # it is important we do this before ReadResponse, as the CF test needs r.text, which is nullified if we first access with iter_content if not self._we_tried_cloudflare_once: self._we_tried_cloudflare_once = True self._SolveCloudFlare( response ) # don't care about 'more_to_download' here. lmao if some server ever tried to pull it off anyway self._ReadResponse( response, self._stream_io ) data = self.GetContentBytes() with self._lock: ( e, error_text ) = ConvertStatusCodeAndDataIntoExceptionInfo( response.status_code, data, self.IS_HYDRUS_SERVICE ) if isinstance( e, ( HydrusExceptions.BandwidthException, HydrusExceptions.ShouldReattemptNetworkException ) ): raise e self._SetError( e, error_text ) request_completed = True except HydrusExceptions.CancelledException: with self._lock: self._status_text = 'Cancelled!' return except HydrusExceptions.BandwidthException as e: self._ResetForAnotherAttempt() if self._CanReattemptRequest(): self.engine.domain_manager.ReportNetworkInfrastructureError( self._url ) else: raise HydrusExceptions.BandwidthException( 'Server reported very limited bandwidth: ' + str( e ) ) self._WaitOnServersideBandwidth( 'server reported limited bandwidth' ) except HydrusExceptions.ShouldReattemptNetworkException as e: self._ResetForAnotherAttempt() if not self._CanReattemptRequest(): raise HydrusExceptions.NetworkInfrastructureException( 'Ran out of reattempts on this error: ' + str( e ) ) self._WaitOnConnectionError( str( e ) ) except requests.exceptions.ChunkedEncodingError: self._ResetForAnotherAttempt() if not self._CanReattemptRequest(): raise HydrusExceptions.StreamTimeoutException( 'Unable to complete request--it broke mid-way!' ) self._WaitOnConnectionError( 'connection broke mid-request' ) except requests.exceptions.SSLError as e: # note a requests SSLError is a ConnectionError, so careful about catching order here self.engine.domain_manager.ReportNetworkInfrastructureError( self._url ) raise HydrusExceptions.ConnectionException( 'Problem with SSL: {}'.format( str( e ) ) ) except ( requests.exceptions.ConnectionError, requests.exceptions.ConnectTimeout ): self._ResetForAnotherConnectionAttempt() if self._CanReattemptConnection(): self.engine.domain_manager.ReportNetworkInfrastructureError( self._url ) else: raise HydrusExceptions.ConnectionException( 'Could not connect!' ) self._WaitOnConnectionError( 'connection failed' ) except requests.exceptions.ReadTimeout: self._ResetForAnotherAttempt() if not self._CanReattemptRequest(): raise HydrusExceptions.StreamTimeoutException( 'Connection successful, but reading response timed out!' ) self._WaitOnConnectionError( 'read timed out' ) except Exception as e: if '\'Retry\' has no attribute' in str( e ): # this is that weird requests 2.25.x(?) urllib3 maybe thread safety error # we'll just try and pause a bit I guess! self._ResetForAnotherConnectionAttempt() if self._CanReattemptConnection(): self.engine.domain_manager.ReportNetworkInfrastructureError( self._url ) else: raise HydrusExceptions.ConnectionException( 'Could not connect!' ) self._WaitOnConnectionError( 'connection failed, and could not recover neatly' ) else: raise finally: with self._lock: snc = self._session_network_context self.engine.session_manager.SetSessionDirty( snc ) if response is not None: # if full data was not read, the response will hang around in connection pool longer than we want # so just an explicit close here response.close() except Exception as e: with self._lock: trace = traceback.format_exc() if not isinstance( e, ( HydrusExceptions.NetworkInfrastructureException, HydrusExceptions.StreamTimeoutException, HydrusExceptions.FileImportRulesException ) ): HydrusData.Print( trace ) if isinstance( e, HydrusExceptions.NetworkInfrastructureException ): self.engine.domain_manager.ReportNetworkInfrastructureError( self._url ) self._status_text = 'Error: ' + str( e ) self._SetError( e, trace ) finally: with self._lock: self._SetDone() def TokensOK( self ) -> bool: with self._lock: need_token = self._gallery_token_name is not None and not self._gallery_token_consumed sld = self._second_level_domain gtn = self._gallery_token_name if need_token: ( consumed, next_timestamp ) = self.engine.bandwidth_manager.TryToConsumeAGalleryToken( sld, gtn ) with self._lock: if consumed: self._status_text = 'starting soon' self._gallery_token_consumed = True else: if HydrusData.TimeHasPassed( self._last_gallery_token_estimate ) and not HydrusData.TimeHasPassed( self._last_gallery_token_estimate + 3 ): self._status_text = 'a different {} got the chance to work'.format( self._gallery_token_name ) else: self._status_text = 'waiting to start: {}'.format( ClientData.TimestampToPrettyTimeDelta( next_timestamp, just_now_threshold = 2, just_now_string = 'checking', no_prefix = True ) ) self._last_gallery_token_estimate = next_timestamp self._Sleep( 0.8 ) return False return True def TryToStartBandwidth( self ): with self._lock: if self._ObeysBandwidth(): result = self.engine.bandwidth_manager.TryToStartRequest( self._network_contexts ) if result: self._bandwidth_tracker.ReportRequestUsed() else: ( bandwidth_waiting_duration, bandwidth_network_context ) = self.engine.bandwidth_manager.GetWaitingEstimateAndContext( self._network_contexts ) will_override = self._bandwidth_manual_override_delayed_timestamp is not None override_coming_first = False if will_override: override_waiting_duration = self._bandwidth_manual_override_delayed_timestamp - HydrusData.GetNow() override_coming_first = override_waiting_duration < bandwidth_waiting_duration just_now_threshold = 2 if override_coming_first: waiting_duration = override_waiting_duration waiting_str = 'overriding bandwidth ' + ClientData.TimestampToPrettyTimeDelta( self._bandwidth_manual_override_delayed_timestamp, just_now_string = 'imminently', just_now_threshold = just_now_threshold ) else: waiting_duration = bandwidth_waiting_duration bandwidth_time_estimate = HydrusData.GetNow() + waiting_duration if HydrusData.TimeHasPassed( self._last_bandwidth_time_estimate ) and not HydrusData.TimeHasPassed( self._last_bandwidth_time_estimate + 3 ): waiting_str = 'a different network job got the bandwidth' else: waiting_str = 'bandwidth free ' + ClientData.TimestampToPrettyTimeDelta( bandwidth_time_estimate, just_now_string = 'imminently', just_now_threshold = just_now_threshold ) self._last_bandwidth_time_estimate = bandwidth_time_estimate waiting_str += '\u2026 (' + bandwidth_network_context.ToHumanString() + ')' self._status_text = waiting_str if waiting_duration > 1200: self._Sleep( 30 ) elif waiting_duration > 120: self._Sleep( 10 ) elif waiting_duration > 10: self._Sleep( 0.8 ) return result else: self._bandwidth_tracker.ReportRequestUsed() self.engine.bandwidth_manager.ReportRequestUsed( self._network_contexts ) return True def WaitUntilDone( self ): while True: if self.IsDone(): break self._is_done_event.wait( 5 ) with self._lock: if HG.started_shutdown or HydrusThreading.IsThreadShuttingDown(): raise HydrusExceptions.ShutdownException() elif self._error_exception is not None: if isinstance( self._error_exception, Exception ): raise self._error_exception else: raise Exception( 'Problem in network error handling.' ) elif self._IsCancelled(): if self._method == 'POST': message = 'Upload cancelled: ' + self._status_text else: message = 'Download cancelled: ' + self._status_text raise HydrusExceptions.CancelledException( message ) def WillingToWaitOnInvalidLogin( self ) -> bool: return self.WILLING_TO_WAIT_ON_INVALID_LOGIN class NetworkJobDownloader( NetworkJob ): def __init__( self, downloader_page_key, method, url, body = None, referral_url = None, temp_path = None ): self._downloader_page_key = downloader_page_key NetworkJob.__init__( self, method, url, body = body, referral_url = referral_url, temp_path = temp_path ) def _GenerateNetworkContexts( self ): network_contexts = NetworkJob._GenerateNetworkContexts( self ) network_contexts.append( ClientNetworkingContexts.NetworkContext( CC.NETWORK_CONTEXT_DOWNLOADER_PAGE, self._downloader_page_key ) ) return network_contexts class NetworkJobSubscription( NetworkJob ): WILLING_TO_WAIT_ON_INVALID_LOGIN = False def __init__( self, subscription_key, method, url, body = None, referral_url = None, temp_path = None ): self._subscription_key = subscription_key NetworkJob.__init__( self, method, url, body = body, referral_url = referral_url, temp_path = temp_path ) def _GenerateNetworkContexts( self ): network_contexts = NetworkJob._GenerateNetworkContexts( self ) network_contexts.append( ClientNetworkingContexts.NetworkContext( CC.NETWORK_CONTEXT_SUBSCRIPTION, self._subscription_key ) ) return network_contexts def CheckHydrusVersion( service_type, response ): service_string = HC.service_string_lookup[ service_type ] headers = response.headers if 'server' in headers and service_string in headers[ 'server' ]: server_header = headers[ 'server' ] elif 'hydrus-server' in headers and service_string in headers[ 'hydrus-server' ]: server_header = headers[ 'hydrus-server' ] else: raise HydrusExceptions.WrongServiceTypeException( 'Target was not a ' + service_string + '!' ) # might be "hydrus tag repository/17" or "hydrus tag repository/17 (498)" kind of thing ( service_string_gumpf, network_version ) = server_header.split( '/', 1 ) if ' ' in network_version: ( network_version, software_version_gumpf ) = network_version.split( ' ', 1 ) network_version = int( network_version ) if network_version != HC.NETWORK_VERSION: if network_version > HC.NETWORK_VERSION: message = 'Your client is out of date; please download the latest release.' else: message = 'The server is out of date; please ask its admin to update to the latest release.' raise HydrusExceptions.NetworkVersionException( 'Network version mismatch! The server\'s network version was ' + str( network_version ) + ', whereas your client\'s is ' + str( HC.NETWORK_VERSION ) + '! ' + message ) class NetworkJobHydrus( NetworkJob ): WILLING_TO_WAIT_ON_INVALID_LOGIN = False IS_HYDRUS_SERVICE = True def __init__( self, service_key, method, url, body = None, referral_url = None, temp_path = None ): self._service_key = service_key NetworkJob.__init__( self, method, url, body = body, referral_url = referral_url, temp_path = temp_path ) def _GenerateNetworkContexts( self ): network_contexts = [ ClientNetworkingContexts.GLOBAL_NETWORK_CONTEXT, ClientNetworkingContexts.NetworkContext( CC.NETWORK_CONTEXT_HYDRUS, self._service_key ) ] return network_contexts def _GenerateSpecificNetworkContexts( self ): # we store cookies on and login to the same hydrus-specific context session_network_context = ClientNetworkingContexts.NetworkContext( CC.NETWORK_CONTEXT_HYDRUS, self._service_key ) login_network_context = session_network_context return ( session_network_context, login_network_context ) def _ReportDataUsed( self, num_bytes ): service = self.engine.controller.services_manager.GetService( self._service_key ) service_type = service.GetServiceType() if service_type in HC.RESTRICTED_SERVICES: account = service.GetAccount() account.ReportDataUsed( num_bytes ) NetworkJob._ReportDataUsed( self, num_bytes ) def _SendRequestAndGetResponse( self ) -> requests.Response: service = self.engine.controller.services_manager.GetService( self._service_key ) service_type = service.GetServiceType() if service_type in HC.RESTRICTED_SERVICES: account = service.GetAccount() account.ReportRequestUsed() response = NetworkJob._SendRequestAndGetResponse( self ) if response.ok and service_type in HC.RESTRICTED_SERVICES: CheckHydrusVersion( service_type, response ) return response def IsHydrusJob( self ): with self._lock: return True class NetworkJobIPFS( NetworkJob ): IS_IPFS_SERVICE = True def __init__( self, url, body = None, referral_url = None, temp_path = None ): method = 'POST' NetworkJob.__init__( self, method, url, body = body, referral_url = referral_url, temp_path = temp_path ) self.OnlyTryConnectionOnce() self.OverrideBandwidth() def _GetTimeouts( self ): ( connect_timeout, read_timeout ) = NetworkJob._GetTimeouts( self ) read_timeout = max( 7200, read_timeout ) return ( connect_timeout, read_timeout ) class NetworkJobWatcherPage( NetworkJob ): def __init__( self, watcher_key, method, url, body = None, referral_url = None, temp_path = None ): self._watcher_key = watcher_key NetworkJob.__init__( self, method, url, body = body, referral_url = referral_url, temp_path = temp_path ) def _GenerateNetworkContexts( self ): network_contexts = NetworkJob._GenerateNetworkContexts( self ) network_contexts.append( ClientNetworkingContexts.NetworkContext( CC.NETWORK_CONTEXT_WATCHER_PAGE, self._watcher_key ) ) return network_contexts