2022-03-16 02:52:54 +00:00
import calendar
2019-01-09 22:59:03 +00:00
import io
2020-05-20 21:36:02 +00:00
import os
2022-03-16 02:52:54 +00:00
import typing
2020-05-20 21:36:02 +00:00
import requests
import threading
import traceback
import time
import urllib
2020-04-22 21:00:35 +00:00
from hydrus . core import HydrusConstants as HC
from hydrus . core import HydrusData
from hydrus . core import HydrusExceptions
from hydrus . core import HydrusGlobals as HG
from hydrus . core import HydrusThreading
from hydrus . core import HydrusText
2021-04-07 21:26:45 +00:00
from hydrus . core . networking import HydrusNetworking
2018-04-18 22:10:15 +00:00
2020-07-29 20:52:44 +00:00
from hydrus . client import ClientConstants as CC
from hydrus . client import ClientData
2022-06-22 20:43:12 +00:00
from hydrus . client import ClientTime
2020-07-29 20:52:44 +00:00
from hydrus . client . networking import ClientNetworkingContexts
2022-01-05 22:15:56 +00:00
from hydrus . client . networking import ClientNetworkingFunctions
2020-07-29 20:52:44 +00:00
2020-04-16 00:09:42 +00:00
try :
import cloudscraper
CLOUDSCRAPER_OK = True
2020-05-13 19:03:16 +00:00
try :
# help pyinstaller
import pyparsing
PYPARSING_OK = True
except :
PYPARSING_OK = False
2020-04-16 00:09:42 +00:00
except :
CLOUDSCRAPER_OK = False
2020-05-13 19:03:16 +00:00
PYPARSING_OK = False
2020-04-16 00:09:42 +00:00
2018-04-18 22:10:15 +00:00
def ConvertStatusCodeAndDataIntoExceptionInfo ( status_code , data , is_hydrus_service = False ) :
2019-02-13 22:26:43 +00:00
( error_text , encoding ) = HydrusText . NonFailingUnicodeDecode ( data , ' utf-8 ' )
2018-04-18 22:10:15 +00:00
2019-01-23 22:19:16 +00:00
print_long_error_text = True
2018-04-18 22:10:15 +00:00
if status_code == 304 :
2019-01-23 22:19:16 +00:00
print_long_error_text = False
2018-04-18 22:10:15 +00:00
eclass = HydrusExceptions . NotModifiedException
2019-02-27 23:03:30 +00:00
elif status_code == 400 :
eclass = HydrusExceptions . BadRequestException
2018-04-18 22:10:15 +00:00
elif status_code == 401 :
2019-02-06 22:41:35 +00:00
eclass = HydrusExceptions . MissingCredentialsException
2018-04-18 22:10:15 +00:00
elif status_code == 403 :
2019-02-06 22:41:35 +00:00
eclass = HydrusExceptions . InsufficientCredentialsException
2018-04-18 22:10:15 +00:00
elif status_code == 404 :
2019-01-23 22:19:16 +00:00
print_long_error_text = False
2018-04-18 22:10:15 +00:00
eclass = HydrusExceptions . NotFoundException
2022-04-06 20:40:17 +00:00
elif status_code == 406 :
eclass = HydrusExceptions . NotAcceptable
2020-05-06 21:31:41 +00:00
elif status_code == 409 :
eclass = HydrusExceptions . ConflictException
2021-03-10 23:10:11 +00:00
elif status_code == 416 :
eclass = HydrusExceptions . RangeNotSatisfiableException
2018-04-18 22:10:15 +00:00
elif status_code == 419 :
eclass = HydrusExceptions . SessionException
elif status_code == 426 :
eclass = HydrusExceptions . NetworkVersionException
2019-10-02 23:38:59 +00:00
elif status_code == 429 :
eclass = HydrusExceptions . BandwidthException
2018-04-18 22:10:15 +00:00
elif status_code == 509 :
eclass = HydrusExceptions . BandwidthException
2020-04-22 21:00:35 +00:00
elif status_code == 502 :
eclass = HydrusExceptions . ShouldReattemptNetworkException
elif status_code == 503 :
2018-04-18 22:10:15 +00:00
2020-04-22 21:00:35 +00:00
if is_hydrus_service :
2018-04-18 22:10:15 +00:00
eclass = HydrusExceptions . ServerBusyException
else :
2020-04-22 21:00:35 +00:00
eclass = HydrusExceptions . ShouldReattemptNetworkException
2018-04-18 22:10:15 +00:00
2020-04-22 21:00:35 +00:00
elif status_code > = 500 :
eclass = HydrusExceptions . ServerException
2018-04-18 22:10:15 +00:00
else :
eclass = HydrusExceptions . NetworkException
2019-01-23 22:19:16 +00:00
if len ( error_text ) > 1024 and print_long_error_text :
large_chunk = error_text [ : 512 * 1024 ]
smaller_chunk = large_chunk [ : 256 ]
HydrusData . DebugPrint ( large_chunk )
error_text = ' The server \' s error text was too long to display. The first part follows, while a larger chunk has been written to the log. '
error_text + = os . linesep
error_text + = smaller_chunk
2019-06-05 19:42:39 +00:00
e = eclass ( ' {} : {} ' . format ( status_code , error_text ) )
2018-04-18 22:10:15 +00:00
return ( e , error_text )
class NetworkJob ( object ) :
2018-10-31 21:41:14 +00:00
WILLING_TO_WAIT_ON_INVALID_LOGIN = True
2018-04-18 22:10:15 +00:00
IS_HYDRUS_SERVICE = False
2020-05-13 19:03:16 +00:00
IS_IPFS_SERVICE = False
2018-04-18 22:10:15 +00:00
2022-01-05 22:15:56 +00:00
def __init__ ( self , method : str , url : str , body = None , referral_url = None , temp_path = None ) :
2018-04-18 22:10:15 +00:00
2019-01-16 22:40:53 +00:00
if body is not None and isinstance ( body , str ) :
body = bytes ( body , ' utf-8 ' )
2018-04-18 22:10:15 +00:00
self . engine = None
self . _lock = threading . Lock ( )
self . _method = method
self . _url = url
2018-08-22 21:10:59 +00:00
2022-03-30 20:28:13 +00:00
self . _current_connection_attempt_number = 1
2022-06-22 20:43:12 +00:00
self . _current_request_attempt_number = 1
self . _this_is_a_one_shot_request = False
2022-03-30 20:28:13 +00:00
self . _we_tried_cloudflare_once = False
2019-07-03 22:49:27 +00:00
2022-01-05 22:15:56 +00:00
self . _domain = ClientNetworkingFunctions . ConvertURLIntoDomain ( self . _url )
self . _second_level_domain = ClientNetworkingFunctions . ConvertURLIntoSecondLevelDomain ( self . _url )
2018-08-22 21:10:59 +00:00
2018-04-18 22:10:15 +00:00
self . _body = body
self . _referral_url = referral_url
2020-12-16 22:29:51 +00:00
self . _actual_fetched_url = self . _url
2018-04-18 22:10:15 +00:00
self . _temp_path = temp_path
2022-05-18 20:18:25 +00:00
self . _response_server_header = None
2022-03-16 02:52:54 +00:00
self . _response_last_modified = None
2021-10-27 21:12:33 +00:00
if self . _temp_path is None :
# 100MB HTML file lmao
self . _max_allowed_bytes = 104857600
else :
self . _max_allowed_bytes = None
2018-04-18 22:10:15 +00:00
self . _files = None
self . _for_login = False
self . _additional_headers = { }
self . _creation_time = HydrusData . GetNow ( )
self . _bandwidth_tracker = HydrusNetworking . BandwidthTracker ( )
2019-08-07 22:59:53 +00:00
self . _connection_error_wake_time = 0
2019-11-28 01:11:46 +00:00
self . _serverside_bandwidth_wake_time = 0
2019-08-07 22:59:53 +00:00
2021-09-01 21:09:01 +00:00
self . _wake_time_float = 0.0
2018-04-18 22:10:15 +00:00
self . _content_type = None
2021-10-27 21:12:33 +00:00
self . _response_mime = None
2018-04-18 22:10:15 +00:00
2019-01-09 22:59:03 +00:00
self . _encoding = ' utf-8 '
2019-02-06 22:41:35 +00:00
self . _encoding_confirmed = False
2019-01-09 22:59:03 +00:00
self . _stream_io = io . BytesIO ( )
2018-04-18 22:10:15 +00:00
self . _error_exception = Exception ( ' Exception not initialised. ' ) # PyLint hint, wew
self . _error_exception = None
self . _error_text = None
self . _is_done_event = threading . Event ( )
2018-10-17 21:00:09 +00:00
self . _is_started = False
2018-04-18 22:10:15 +00:00
self . _is_done = False
self . _is_cancelled = False
2018-08-22 21:10:59 +00:00
self . _gallery_token_name = None
self . _gallery_token_consumed = False
2021-09-01 21:09:01 +00:00
self . _last_gallery_token_estimate = 0
2018-04-18 22:10:15 +00:00
self . _bandwidth_manual_override = False
self . _bandwidth_manual_override_delayed_timestamp = None
2021-09-01 21:09:01 +00:00
self . _last_bandwidth_time_estimate = 0
2018-04-18 22:10:15 +00:00
self . _last_time_ongoing_bandwidth_failed = 0
2019-01-09 22:59:03 +00:00
self . _status_text = ' initialising \u2026 '
2018-04-18 22:10:15 +00:00
self . _num_bytes_read = 0
self . _num_bytes_to_read = 1
2021-10-27 21:12:33 +00:00
self . _num_bytes_read_is_accurate = True
2021-11-03 20:49:56 +00:00
self . _number_of_concurrent_empty_chunks = 0
2018-04-18 22:10:15 +00:00
2018-06-06 21:27:02 +00:00
self . _file_import_options = None
2018-04-18 22:10:15 +00:00
self . _network_contexts = self . _GenerateNetworkContexts ( )
( self . _session_network_context , self . _login_network_context ) = self . _GenerateSpecificNetworkContexts ( )
2018-05-09 20:23:00 +00:00
def _CanReattemptConnection ( self ) :
2022-06-22 20:43:12 +00:00
if self . _this_is_a_one_shot_request :
return False
max_connection_attempts_allowed = HG . client_controller . new_options . GetInteger ( ' max_connection_attempts_allowed ' )
return self . _current_connection_attempt_number < = max_connection_attempts_allowed
2018-05-09 20:23:00 +00:00
2018-04-18 22:10:15 +00:00
def _CanReattemptRequest ( self ) :
2022-06-22 20:43:12 +00:00
if self . _this_is_a_one_shot_request :
return False
2018-04-18 22:10:15 +00:00
if self . _method == ' GET ' :
2022-06-22 20:43:12 +00:00
max_attempts_allowed = HG . client_controller . new_options . GetInteger ( ' max_request_attempts_allowed_get ' )
2018-04-18 22:10:15 +00:00
2022-06-22 20:43:12 +00:00
else :
2018-04-18 22:10:15 +00:00
max_attempts_allowed = 1
2022-06-22 20:43:12 +00:00
return self . _current_request_attempt_number < = max_attempts_allowed
2018-04-18 22:10:15 +00:00
2022-03-16 02:52:54 +00:00
def _GenerateModifiedDate ( self , response : requests . Response ) :
if ' Last-Modified ' in response . headers :
# Thu, 20 May 2010 07:00:23 GMT
# these are always in GMT
last_modified_string = response . headers [ ' Last-Modified ' ]
if last_modified_string . endswith ( ' GMT ' ) :
last_modified_string = last_modified_string [ : - 4 ]
try :
struct_time = time . strptime ( last_modified_string , ' %a , %d % b % Y % H: % M: % S ' )
# the given struct is in GMT, so calendar.timegm is appropriate here
2022-06-22 20:43:12 +00:00
last_modified_time = int ( calendar . timegm ( struct_time ) )
if ClientTime . TimestampIsSensible ( last_modified_time ) :
self . _response_last_modified = last_modified_time
2022-03-16 02:52:54 +00:00
except :
pass
2018-04-18 22:10:15 +00:00
def _GenerateNetworkContexts ( self ) :
2022-03-30 20:28:13 +00:00
network_contexts = [ ClientNetworkingContexts . GLOBAL_NETWORK_CONTEXT ]
2018-04-18 22:10:15 +00:00
2022-01-05 22:15:56 +00:00
domains = ClientNetworkingFunctions . ConvertDomainIntoAllApplicableDomains ( self . _domain )
2018-04-18 22:10:15 +00:00
network_contexts . extend ( ( ClientNetworkingContexts . NetworkContext ( CC . NETWORK_CONTEXT_DOMAIN , domain ) for domain in domains ) )
return network_contexts
def _GenerateSpecificNetworkContexts ( self ) :
# we always store cookies in the larger session (even if the cookie itself refers to a subdomain in the session object)
# but we can login to a specific subdomain
2018-08-22 21:10:59 +00:00
session_network_context = ClientNetworkingContexts . NetworkContext ( CC . NETWORK_CONTEXT_DOMAIN , self . _second_level_domain )
login_network_context = ClientNetworkingContexts . NetworkContext ( CC . NETWORK_CONTEXT_DOMAIN , self . _domain )
2018-04-18 22:10:15 +00:00
return ( session_network_context , login_network_context )
2020-08-27 01:00:42 +00:00
def _GetTimeouts ( self ) :
connect_timeout = HG . client_controller . new_options . GetInteger ( ' network_timeout ' )
read_timeout = connect_timeout * 6
return ( connect_timeout , read_timeout )
2018-04-18 22:10:15 +00:00
def _IsCancelled ( self ) :
if self . _is_cancelled :
return True
2022-01-19 21:28:59 +00:00
if HG . started_shutdown :
2018-04-18 22:10:15 +00:00
return True
return False
def _IsDone ( self ) :
if self . _is_done :
return True
2022-01-19 21:28:59 +00:00
if HG . started_shutdown or HydrusThreading . IsThreadShuttingDown ( ) :
2018-04-18 22:10:15 +00:00
return True
return False
def _ObeysBandwidth ( self ) :
if self . _bandwidth_manual_override :
return False
if self . _bandwidth_manual_override_delayed_timestamp is not None and HydrusData . TimeHasPassed ( self . _bandwidth_manual_override_delayed_timestamp ) :
return False
if self . _method == ' POST ' :
return False
if self . _for_login :
return False
return True
def _OngoingBandwidthOK ( self ) :
now = HydrusData . GetNow ( )
if now == self . _last_time_ongoing_bandwidth_failed : # it won't have changed, so no point spending any cpu checking
return False
else :
result = self . engine . bandwidth_manager . CanContinueDownload ( self . _network_contexts )
if not result :
self . _last_time_ongoing_bandwidth_failed = now
return result
2021-10-27 21:12:33 +00:00
def _ParseFirstResponseHeaders ( self , response : requests . Response ) :
2018-04-18 22:10:15 +00:00
with self . _lock :
2021-10-27 21:12:33 +00:00
if ' Content-Type ' in response . headers :
self . _content_type = response . headers [ ' Content-Type ' ]
2018-06-06 21:27:02 +00:00
if self . _content_type is not None and self . _content_type in HC . mime_enum_lookup :
2021-10-27 21:12:33 +00:00
self . _response_mime = HC . mime_enum_lookup [ self . _content_type ]
2018-06-06 21:27:02 +00:00
else :
2021-10-27 21:12:33 +00:00
self . _response_mime = None
2018-06-06 21:27:02 +00:00
2018-04-18 22:10:15 +00:00
if ' content-length ' in response . headers :
2019-07-03 22:49:27 +00:00
2018-04-18 22:10:15 +00:00
self . _num_bytes_to_read = int ( response . headers [ ' content-length ' ] )
2021-10-27 21:12:33 +00:00
else :
2018-06-06 21:27:02 +00:00
2021-10-27 21:12:33 +00:00
self . _num_bytes_to_read = None
if response . encoding is not None :
self . _encoding = response . encoding
if response . ok : # i.e. we got what we expected, not some error
if ' content-length ' in response . headers :
2018-06-06 21:27:02 +00:00
2021-10-27 21:12:33 +00:00
if self . _max_allowed_bytes is not None and self . _num_bytes_to_read > self . _max_allowed_bytes :
raise HydrusExceptions . NetworkException ( ' The url was apparently {} but the max network size for this type of job is {} ! ' . format ( HydrusData . ToHumanBytes ( self . _num_bytes_to_read ) , HydrusData . ToHumanBytes ( self . _max_allowed_bytes ) ) )
2018-06-06 21:27:02 +00:00
2021-10-27 21:12:33 +00:00
if self . _file_import_options is not None :
is_complete_file_size = True
self . _file_import_options . CheckNetworkDownload ( self . _response_mime , self . _num_bytes_to_read , is_complete_file_size )
2018-04-18 22:10:15 +00:00
2021-10-27 21:12:33 +00:00
def _ReadResponse ( self , response : requests . Response , stream_dest ) :
if ' content-range ' in response . headers :
content_range = response . headers [ ' content-range ' ]
# Content-Range: <unit> <range-start>-<range-end>/<size>
# range and size can be *
if content_range . startswith ( ' bytes ' ) :
2018-04-18 22:10:15 +00:00
2021-10-27 21:12:33 +00:00
content_range = content_range [ 6 : ]
if ' / ' in content_range :
( byte_range , size ) = content_range . split ( ' / ' , 1 )
if byte_range != ' * ' and ' - ' in byte_range :
( byte_start , byte_end ) = byte_range . split ( ' - ' , 1 )
try :
byte_start = int ( byte_start )
if byte_start != self . _num_bytes_read :
# this server be crazy
# I guess in some cases we might be able to fast forward a < byte_start, but we don't have that raw byte access tech yet
# and if byte_start > num_bytes_read, then lmao
raise HydrusExceptions . NetworkException ( ' This server delivered an undesired Range response! We asked for Range " {} " and got Content-Range " {} " back! ' . format ( response . request . headers [ ' range ' ] , response . headers [ ' content-range ' ] ) )
except :
pass
if size != ' * ' :
if self . _num_bytes_to_read is None :
try :
num_bytes = int ( size )
self . _num_bytes_to_read = num_bytes
except :
pass
2018-04-18 22:10:15 +00:00
2021-10-27 21:12:33 +00:00
starting_num_bytes_read = self . _num_bytes_read
2019-07-03 22:49:27 +00:00
2018-04-18 22:10:15 +00:00
for chunk in response . iter_content ( chunk_size = 65536 ) :
if self . _IsCancelled ( ) :
2021-10-27 21:12:33 +00:00
raise HydrusExceptions . CancelledException ( )
2018-04-18 22:10:15 +00:00
stream_dest . write ( chunk )
2021-10-27 21:12:33 +00:00
# get the raw bytes read, not the length of the chunk, as there may be transfer-encoding (chunked, gzip etc...)
total_bytes_read_in_this_response = response . raw . tell ( )
2019-07-03 22:49:27 +00:00
2021-10-27 21:12:33 +00:00
if total_bytes_read_in_this_response == 0 :
2019-07-03 22:49:27 +00:00
2021-10-27 21:12:33 +00:00
# this seems to occur when the response is Transfer-Encoding: chunked (note, no Content-Length)
2019-07-03 22:49:27 +00:00
# there's no great way to track raw bytes read in this case. the iter_content chunk can be unzipped from that
# nonetheless, requests does raise ChunkedEncodingError if it stops early, so not a huge deal to miss here, just slightly off bandwidth tracking
2021-10-27 21:12:33 +00:00
self . _num_bytes_read_is_accurate = False
2019-07-03 22:49:27 +00:00
chunk_num_bytes = len ( chunk )
self . _num_bytes_read + = chunk_num_bytes
else :
2021-10-27 21:12:33 +00:00
previous_num_bytes_read = self . _num_bytes_read
2019-07-03 22:49:27 +00:00
2021-10-27 21:12:33 +00:00
self . _num_bytes_read = starting_num_bytes_read + total_bytes_read_in_this_response
chunk_num_bytes = self . _num_bytes_read - previous_num_bytes_read
2019-07-03 22:49:27 +00:00
2018-04-18 22:10:15 +00:00
with self . _lock :
2021-10-27 21:12:33 +00:00
if self . _num_bytes_to_read is not None and self . _num_bytes_read_is_accurate and self . _num_bytes_read > self . _num_bytes_to_read :
2019-07-03 22:49:27 +00:00
raise HydrusExceptions . NetworkException ( ' Too much data: Was expecting {} but server continued responding! ' . format ( HydrusData . ToHumanBytes ( self . _num_bytes_to_read ) ) )
2018-04-18 22:10:15 +00:00
2021-10-27 21:12:33 +00:00
if self . _max_allowed_bytes is not None and self . _num_bytes_read > self . _max_allowed_bytes :
2018-04-18 22:10:15 +00:00
2021-10-27 21:12:33 +00:00
raise HydrusExceptions . NetworkException ( ' The url exceeded the max network size for this type of job, which is {} ! ' . format ( HydrusData . ToHumanBytes ( self . _max_allowed_bytes ) ) )
2018-06-06 21:27:02 +00:00
if self . _file_import_options is not None :
2019-01-23 22:19:16 +00:00
is_complete_file_size = False
2018-06-06 21:27:02 +00:00
2021-10-27 21:12:33 +00:00
self . _file_import_options . CheckNetworkDownload ( self . _response_mime , self . _num_bytes_read , is_complete_file_size )
2018-04-18 22:10:15 +00:00
2019-07-03 22:49:27 +00:00
self . _ReportDataUsed ( chunk_num_bytes )
2018-04-18 22:10:15 +00:00
self . _WaitOnOngoingBandwidth ( )
2022-01-19 21:28:59 +00:00
if HG . started_shutdown :
2018-04-18 22:10:15 +00:00
raise HydrusExceptions . ShutdownException ( )
2021-10-27 21:12:33 +00:00
# stick with GET for now. if there is a complex way to range-chunk a POST, we'll deal with it then, but I don't want to spam file uploads to IQDB by accident etc...
download_is_definitely_incomplete = self . _method == ' GET ' and self . _num_bytes_to_read is not None and self . _num_bytes_read_is_accurate and self . _num_bytes_read < self . _num_bytes_to_read
we_read_some_data = self . _num_bytes_read > starting_num_bytes_read
2021-09-22 21:12:34 +00:00
2021-10-27 21:12:33 +00:00
if download_is_definitely_incomplete and not we_read_some_data :
2018-04-18 22:10:15 +00:00
2021-11-03 20:49:56 +00:00
self . _number_of_concurrent_empty_chunks + = 1
if self . _number_of_concurrent_empty_chunks > 2 :
raise HydrusExceptions . NetworkException ( ' The server appeared to want to send this URL in ranged chunks, but this chunk was empty! ' )
more_to_download = True
else :
self . _number_of_concurrent_empty_chunks = 0
more_to_download = we_read_some_data and download_is_definitely_incomplete
2018-04-18 22:10:15 +00:00
2021-10-27 21:12:33 +00:00
if not more_to_download :
if self . _file_import_options is not None :
is_complete_file_size = True
self . _file_import_options . CheckNetworkDownload ( self . _response_mime , self . _num_bytes_read , is_complete_file_size )
return more_to_download
2018-04-18 22:10:15 +00:00
def _ReportDataUsed ( self , num_bytes ) :
self . _bandwidth_tracker . ReportDataUsed ( num_bytes )
self . engine . bandwidth_manager . ReportDataUsed ( self . _network_contexts , num_bytes )
2022-06-22 20:43:12 +00:00
def _ResetForAnotherAttempt ( self ) :
2021-10-27 21:12:33 +00:00
2022-06-22 20:43:12 +00:00
self . _current_request_attempt_number + = 1
2021-10-27 21:12:33 +00:00
self . _content_type = None
self . _response_mime = None
self . _encoding = ' utf-8 '
self . _encoding_confirmed = False
self . _stream_io = io . BytesIO ( )
self . _num_bytes_read = 0
self . _num_bytes_to_read = 1
self . _num_bytes_read_is_accurate = True
2021-11-03 20:49:56 +00:00
self . _number_of_concurrent_empty_chunks = 0
2021-10-27 21:12:33 +00:00
2022-06-22 20:43:12 +00:00
def _ResetForAnotherConnectionAttempt ( self ) :
self . _ResetForAnotherAttempt ( )
self . _current_connection_attempt_number + = 1
self . _current_request_attempt_number = 1
2021-09-22 21:12:34 +00:00
def _SendRequestAndGetResponse ( self ) - > requests . Response :
with self . _lock :
ncs = list ( self . _network_contexts )
headers = self . engine . domain_manager . GetHeaders ( ncs )
with self . _lock :
method = self . _method
url = self . _url
data = self . _body
files = self . _files
if self . IS_HYDRUS_SERVICE or self . IS_IPFS_SERVICE :
headers [ ' User-Agent ' ] = ' hydrus client/ ' + str ( HC . NETWORK_VERSION )
2022-01-05 22:15:56 +00:00
referral_url = self . engine . domain_manager . GetReferralURL ( url , self . _referral_url )
2021-10-27 21:12:33 +00:00
2022-01-05 22:15:56 +00:00
url_class = self . engine . domain_manager . GetURLClass ( url )
2021-09-22 21:12:34 +00:00
2022-01-05 22:15:56 +00:00
if url_class is not None :
headers . update ( url_class . GetHeaderOverrides ( ) )
2021-09-22 21:12:34 +00:00
2022-01-05 22:15:56 +00:00
if url_class is None or url_class . GetURLType ( ) in ( HC . URL_TYPE_FILE , HC . URL_TYPE_UNKNOWN ) :
headers [ ' Range ' ] = ' bytes= {} - ' . format ( self . _num_bytes_read )
2021-09-22 21:12:34 +00:00
if HG . network_report_mode :
2022-01-05 22:15:56 +00:00
HydrusData . ShowText ( ' Network Jobs Referral URLs for {} : {} Given: {} {} Used: {} ' . format ( url , os . linesep , self . _referral_url , os . linesep , referral_url ) )
2021-09-22 21:12:34 +00:00
if referral_url is not None :
try :
referral_url . encode ( ' latin-1 ' )
except UnicodeEncodeError :
# quick and dirty way to quote this url when it comes here with full unicode chars. not perfect, but does the job
referral_url = urllib . parse . quote ( referral_url , " !#$ % & ' ()*+,/:;=?@[]~ " )
if HG . network_report_mode :
2022-01-05 22:15:56 +00:00
HydrusData . ShowText ( ' Network Jobs Quoted Referral URL for {} : {} {} ' . format ( url , os . linesep , referral_url ) )
2021-09-22 21:12:34 +00:00
headers [ ' referer ' ] = referral_url
for ( key , value ) in self . _additional_headers . items ( ) :
headers [ key ] = value
2021-10-27 21:12:33 +00:00
if self . _num_bytes_read == 0 :
self . _status_text = ' sending request \u2026 '
2021-09-22 21:12:34 +00:00
snc = self . _session_network_context
session = self . engine . session_manager . GetSession ( snc )
( connect_timeout , read_timeout ) = self . _GetTimeouts ( )
response = session . request ( method , url , data = data , files = files , headers = headers , stream = True , timeout = ( connect_timeout , read_timeout ) )
2021-10-27 21:12:33 +00:00
with self . _lock :
if self . _body is not None :
self . _ReportDataUsed ( len ( self . _body ) )
2021-09-22 21:12:34 +00:00
return response
2018-04-18 22:10:15 +00:00
def _SetCancelled ( self ) :
self . _is_cancelled = True
self . _SetDone ( )
def _SetError ( self , e , error ) :
self . _error_exception = e
self . _error_text = error
2018-12-12 22:15:46 +00:00
if HG . network_report_mode :
HydrusData . ShowText ( ' Network error should follow: ' )
HydrusData . ShowException ( e )
HydrusData . ShowText ( error )
2018-04-18 22:10:15 +00:00
self . _SetDone ( )
def _SetDone ( self ) :
self . _is_done = True
self . _is_done_event . set ( )
2021-09-01 21:09:01 +00:00
def _Sleep ( self , seconds_float ) :
2018-04-18 22:10:15 +00:00
2021-09-01 21:09:01 +00:00
self . _wake_time_float = HydrusData . GetNowFloat ( ) + seconds_float
2018-04-18 22:10:15 +00:00
2020-04-16 00:09:42 +00:00
def _SolveCloudFlare ( self , response ) :
if CLOUDSCRAPER_OK :
try :
2022-03-23 20:57:10 +00:00
# cloudscraper refactored a bit around 1.2.60, so we now have some different paths to what we want
2020-07-29 20:52:44 +00:00
2022-03-30 20:28:13 +00:00
old_module = None
new_module = None
if hasattr ( cloudscraper , ' CloudScraper ' ) :
old_module = getattr ( cloudscraper , ' CloudScraper ' )
if hasattr ( cloudscraper , ' cloudflare ' ) :
m = getattr ( cloudscraper , ' cloudflare ' )
if hasattr ( m , ' Cloudflare ' ) :
new_module = getattr ( m , ' Cloudflare ' )
2022-03-23 20:57:10 +00:00
possible_paths = [
2022-03-30 20:28:13 +00:00
( old_module , ' is_Firewall_Blocked ' ) ,
( new_module , ' is_Firewall_Blocked ' )
2022-03-23 20:57:10 +00:00
]
is_firewall = False
for ( m , method_name ) in possible_paths :
2020-07-29 20:52:44 +00:00
2022-03-30 20:28:13 +00:00
if m is None :
continue
2022-03-23 20:57:10 +00:00
if hasattr ( m , method_name ) :
is_firewall = getattr ( m , method_name ) ( response )
if is_firewall :
break
2020-07-29 20:52:44 +00:00
2022-03-23 20:57:10 +00:00
possible_paths = [
2022-03-30 20:28:13 +00:00
( old_module , ' is_reCaptcha_Challenge ' ) ,
( old_module , ' is_Captcha_Challenge ' ) ,
( new_module , ' is_Captcha_Challenge ' )
2022-03-23 20:57:10 +00:00
]
is_captcha = False
for ( m , method_name ) in possible_paths :
2020-07-29 20:52:44 +00:00
2022-03-30 20:28:13 +00:00
if m is None :
continue
2022-03-23 20:57:10 +00:00
if hasattr ( m , method_name ) :
is_captcha = getattr ( m , method_name ) ( response )
if is_captcha :
break
2020-07-29 20:52:44 +00:00
2022-03-23 20:57:10 +00:00
possible_paths = [
2022-03-30 20:28:13 +00:00
( old_module , ' is_IUAM_Challenge ' ) ,
( new_module , ' is_IUAM_Challenge ' ) ,
( new_module , ' is_New_IUAM_Challenge ' )
2022-03-23 20:57:10 +00:00
]
is_iuam = False
for ( m , method_name ) in possible_paths :
2020-07-29 20:52:44 +00:00
2022-03-30 20:28:13 +00:00
if m is None :
continue
2022-03-23 20:57:10 +00:00
if hasattr ( m , method_name ) :
is_iuam = getattr ( m , method_name ) ( response )
if is_iuam :
break
2020-07-29 20:52:44 +00:00
2022-03-23 20:57:10 +00:00
is_attemptable = is_captcha or is_iuam
2020-04-16 00:09:42 +00:00
except Exception as e :
HydrusData . Print ( ' cloudflarescraper had an error looking at " {} " response: {} ' . format ( self . _url , str ( e ) ) )
HydrusData . PrintException ( e )
return
if is_firewall :
raise HydrusExceptions . CloudFlareException ( ' It looks like the site has Firewall-Blocked your IP or IP range with CloudFlare. ' )
if is_attemptable :
try :
with self . _lock :
ncs = list ( self . _network_contexts )
snc = self . _session_network_context
headers = self . engine . domain_manager . GetHeaders ( ncs )
if ' User-Agent ' not in headers :
raise HydrusExceptions . CloudFlareException ( ' No User-Agent set for hydrus! ' )
user_agent = headers [ ' User-Agent ' ]
( cf_tokens , user_agent ) = cloudscraper . get_tokens ( self . _url , browser = { ' custom ' : user_agent } )
session = self . engine . session_manager . GetSession ( snc )
2020-04-16 02:14:58 +00:00
cf_cookies = [ cookie for cookie in session . cookies if cookie . name . startswith ( ' __cf ' ) ]
for cookie in cf_cookies :
session . cookies . clear ( cookie . domain , cookie . path , cookie . name )
2022-01-05 22:15:56 +00:00
domain = ' . {} ' . format ( ClientNetworkingFunctions . ConvertURLIntoSecondLevelDomain ( self . _url ) )
2020-04-16 00:09:42 +00:00
path = ' / '
2020-04-16 02:14:58 +00:00
expires = HydrusData . GetNow ( ) + 30 * 86400
secure = True
rest = { ' HttpOnly ' : None , ' SameSite ' : ' None ' }
2020-04-16 00:09:42 +00:00
for ( name , value ) in cf_tokens . items ( ) :
2022-01-05 22:15:56 +00:00
ClientNetworkingFunctions . AddCookieToSession ( session , name , value , domain , path , expires , secure = secure , rest = rest )
2020-04-16 00:09:42 +00:00
2021-01-13 21:48:58 +00:00
self . engine . session_manager . SetSessionDirty ( snc )
2020-04-16 00:09:42 +00:00
except Exception as e :
2021-11-24 21:59:58 +00:00
raise HydrusExceptions . CloudFlareException ( ' This looks like an unsolvable CloudFlare captcha! Best solution we know of is to copy cookies and User-Agent header from your web browser to hydrus! ' )
2020-04-16 00:09:42 +00:00
raise HydrusExceptions . ShouldReattemptNetworkException ( ' CloudFlare needed solving. ' )
2022-03-30 20:28:13 +00:00
def _WaitOnConnectionError ( self , status_text : str ) :
2019-06-19 22:08:48 +00:00
2019-11-28 01:11:46 +00:00
connection_error_wait_time = HG . client_controller . new_options . GetInteger ( ' connection_error_wait_time ' )
self . _connection_error_wake_time = HydrusData . GetNow ( ) + ( ( self . _current_connection_attempt_number - 1 ) * connection_error_wait_time )
2019-06-19 22:08:48 +00:00
2019-08-07 22:59:53 +00:00
while not HydrusData . TimeHasPassed ( self . _connection_error_wake_time ) and not self . _IsCancelled ( ) :
2019-06-19 22:08:48 +00:00
with self . _lock :
2022-03-30 20:28:13 +00:00
self . _status_text = ' {} - retrying in {} ' . format ( status_text , ClientData . TimestampToPrettyTimeDelta ( self . _connection_error_wake_time ) )
time . sleep ( 1 )
self . _WaitOnNetworkTrafficPaused ( status_text )
def _WaitOnNetworkTrafficPaused ( self , status_text : str ) :
while HG . client_controller . new_options . GetBoolean ( ' pause_all_new_network_traffic ' ) and not self . _IsCancelled ( ) :
with self . _lock :
self . _status_text = ' {} - now waiting because all network traffic is paused ' . format ( status_text )
2019-06-19 22:08:48 +00:00
time . sleep ( 1 )
2018-04-18 22:10:15 +00:00
def _WaitOnOngoingBandwidth ( self ) :
while not self . _OngoingBandwidthOK ( ) and not self . _IsCancelled ( ) :
time . sleep ( 0.1 )
2022-03-30 20:28:13 +00:00
def _WaitOnServersideBandwidth ( self , status_text : str ) :
2019-11-28 01:11:46 +00:00
# 429 or 509 response from server. basically means 'I'm under big load mate'
# a future version of this could def talk to domain manager and add a temp delay so other network jobs can be informed
serverside_bandwidth_wait_time = HG . client_controller . new_options . GetInteger ( ' serverside_bandwidth_wait_time ' )
2022-06-22 20:43:12 +00:00
problem_rating = ( self . _current_connection_attempt_number + self . _current_request_attempt_number ) - 1
self . _serverside_bandwidth_wake_time = HydrusData . GetNow ( ) + ( problem_rating * serverside_bandwidth_wait_time )
2019-11-28 01:11:46 +00:00
while not HydrusData . TimeHasPassed ( self . _serverside_bandwidth_wake_time ) and not self . _IsCancelled ( ) :
with self . _lock :
2022-03-30 20:28:13 +00:00
self . _status_text = ' {} - retrying in {} ' . format ( status_text , ClientData . TimestampToPrettyTimeDelta ( self . _serverside_bandwidth_wake_time ) )
2019-11-28 01:11:46 +00:00
time . sleep ( 1 )
2022-03-30 20:28:13 +00:00
self . _WaitOnNetworkTrafficPaused ( status_text )
2019-11-28 01:11:46 +00:00
2018-04-18 22:10:15 +00:00
def AddAdditionalHeader ( self , key , value ) :
with self . _lock :
self . _additional_headers [ key ] = value
def BandwidthOK ( self ) :
with self . _lock :
if self . _ObeysBandwidth ( ) :
2020-06-17 21:31:54 +00:00
return self . engine . bandwidth_manager . CanDoWork ( self . _network_contexts )
2018-04-18 22:10:15 +00:00
else :
return True
2018-10-31 21:41:14 +00:00
def Cancel ( self , status_text = None ) :
2018-04-18 22:10:15 +00:00
with self . _lock :
2018-10-31 21:41:14 +00:00
if status_text is None :
status_text = ' cancelled! '
self . _status_text = status_text
2018-04-18 22:10:15 +00:00
self . _SetCancelled ( )
def CanValidateInPopup ( self ) :
with self . _lock :
return self . engine . domain_manager . CanValidateInPopup ( self . _network_contexts )
def CheckCanLogin ( self ) :
with self . _lock :
if self . _for_login :
2018-10-24 21:34:02 +00:00
raise HydrusExceptions . ValidationException ( ' Login jobs should not be asked if they can login! ' )
2018-04-18 22:10:15 +00:00
else :
return self . engine . login_manager . CheckCanLogin ( self . _login_network_context )
2019-08-07 22:59:53 +00:00
def CurrentlyWaitingOnConnectionError ( self ) :
with self . _lock :
return not HydrusData . TimeHasPassed ( self . _connection_error_wake_time )
2019-11-28 01:11:46 +00:00
def CurrentlyWaitingOnServersideBandwidth ( self ) :
with self . _lock :
return not HydrusData . TimeHasPassed ( self . _serverside_bandwidth_wake_time )
2020-04-16 00:09:42 +00:00
def DomainOK ( self ) :
with self . _lock :
2022-06-22 20:43:12 +00:00
if self . _this_is_a_one_shot_request :
2020-04-16 00:09:42 +00:00
return True
domain_ok = self . engine . domain_manager . DomainOK ( self . _url )
if not domain_ok :
self . _status_text = ' This domain has had several serious errors recently. Waiting a bit. '
self . _Sleep ( 10 )
return domain_ok
2018-04-18 22:10:15 +00:00
def GenerateLoginProcess ( self ) :
with self . _lock :
if self . _for_login :
raise Exception ( ' Login jobs should not be asked to generate login processes! ' )
else :
return self . engine . login_manager . GenerateLoginProcess ( self . _login_network_context )
def GenerateValidationPopupProcess ( self ) :
with self . _lock :
return self . engine . domain_manager . GenerateValidationPopupProcess ( self . _network_contexts )
2020-12-16 22:29:51 +00:00
def GetActualFetchedURL ( self ) :
with self . _lock :
return self . _actual_fetched_url
2019-01-09 22:59:03 +00:00
def GetContentBytes ( self ) :
2018-04-18 22:10:15 +00:00
with self . _lock :
self . _stream_io . seek ( 0 )
return self . _stream_io . read ( )
2019-01-09 22:59:03 +00:00
def GetContentText ( self ) :
data = self . GetContentBytes ( )
2019-02-13 22:26:43 +00:00
( text , self . _encoding ) = HydrusText . NonFailingUnicodeDecode ( data , self . _encoding )
2019-01-09 22:59:03 +00:00
return text
2018-04-18 22:10:15 +00:00
def GetContentType ( self ) :
with self . _lock :
return self . _content_type
def GetCreationTime ( self ) :
with self . _lock :
return self . _creation_time
2018-08-22 21:10:59 +00:00
def GetDomain ( self ) :
with self . _lock :
return self . _domain
2018-04-18 22:10:15 +00:00
def GetErrorException ( self ) :
with self . _lock :
return self . _error_exception
def GetErrorText ( self ) :
with self . _lock :
return self . _error_text
2022-03-16 02:52:54 +00:00
def GetLastModifiedTime ( self ) - > typing . Optional [ int ] :
with self . _lock :
return self . _response_last_modified
2018-11-28 22:31:04 +00:00
def GetLoginNetworkContext ( self ) :
with self . _lock :
return self . _login_network_context
2018-04-18 22:10:15 +00:00
def GetNetworkContexts ( self ) :
with self . _lock :
return list ( self . _network_contexts )
2018-08-22 21:10:59 +00:00
def GetSecondLevelDomain ( self ) :
with self . _lock :
return self . _second_level_domain
2018-10-17 21:00:09 +00:00
def GetSession ( self ) :
with self . _lock :
snc = self . _session_network_context
session = self . engine . session_manager . GetSession ( snc )
return session
2018-04-18 22:10:15 +00:00
def GetStatus ( self ) :
with self . _lock :
return ( self . _status_text , self . _bandwidth_tracker . GetUsage ( HC . BANDWIDTH_TYPE_DATA , 1 ) , self . _num_bytes_read , self . _num_bytes_to_read )
def GetTotalDataUsed ( self ) :
with self . _lock :
return self . _bandwidth_tracker . GetUsage ( HC . BANDWIDTH_TYPE_DATA , None )
def GetURL ( self ) :
with self . _lock :
return self . _url
def HasError ( self ) :
with self . _lock :
return self . _error_exception is not None
def IsAsleep ( self ) :
with self . _lock :
2021-09-01 21:09:01 +00:00
return not HydrusData . TimeHasPassedFloat ( self . _wake_time_float )
2018-04-18 22:10:15 +00:00
def IsCancelled ( self ) :
with self . _lock :
return self . _IsCancelled ( )
2022-05-18 20:18:25 +00:00
def IsCloudFlareCache ( self ) :
with self . _lock :
return self . _response_server_header is not None and self . _response_server_header == ' cloudflare '
2018-04-18 22:10:15 +00:00
def IsDone ( self ) :
with self . _lock :
return self . _IsDone ( )
2018-11-28 22:31:04 +00:00
def IsHydrusJob ( self ) :
with self . _lock :
return False
2018-04-18 22:10:15 +00:00
def IsValid ( self ) :
with self . _lock :
return self . engine . domain_manager . IsValid ( self . _network_contexts )
def NeedsLogin ( self ) :
with self . _lock :
if self . _for_login :
return False
else :
return self . engine . login_manager . NeedsLogin ( self . _login_network_context )
def NoEngineYet ( self ) :
return self . engine is None
def ObeysBandwidth ( self ) :
return self . _ObeysBandwidth ( )
2019-07-03 22:49:27 +00:00
def OnlyTryConnectionOnce ( self ) :
2022-06-22 20:43:12 +00:00
self . _this_is_a_one_shot_request = True
2019-07-03 22:49:27 +00:00
2018-04-18 22:10:15 +00:00
def OverrideBandwidth ( self , delay = None ) :
with self . _lock :
if delay is None :
self . _bandwidth_manual_override = True
2021-09-01 21:09:01 +00:00
self . _wake_time_float = 0.0
2018-04-18 22:10:15 +00:00
else :
self . _bandwidth_manual_override_delayed_timestamp = HydrusData . GetNow ( ) + delay
2021-09-01 21:09:01 +00:00
self . _wake_time_float = min ( self . _wake_time_float , self . _bandwidth_manual_override_delayed_timestamp + 1.0 )
2018-04-18 22:10:15 +00:00
2019-08-07 22:59:53 +00:00
def OverrideConnectionErrorWait ( self ) :
with self . _lock :
self . _connection_error_wake_time = 0
2019-11-28 01:11:46 +00:00
def OverrideServersideBandwidthWait ( self ) :
with self . _lock :
self . _serverside_bandwidth_wake_time = 0
2018-09-19 21:54:51 +00:00
def OverrideToken ( self ) :
with self . _lock :
self . _gallery_token_consumed = True
2021-09-01 21:09:01 +00:00
self . _wake_time_float = 0.0
2018-09-19 21:54:51 +00:00
2021-10-06 20:59:30 +00:00
def ScrubDomainErrors ( self ) :
with self . _lock :
self . engine . domain_manager . ScrubDomainErrors ( self . _url )
self . _wake_time_float = 0.0
2022-01-05 22:15:56 +00:00
def SetError ( self , e : Exception , error : str ) :
2018-04-18 22:10:15 +00:00
with self . _lock :
self . _SetError ( e , error )
def SetFiles ( self , files ) :
with self . _lock :
self . _files = files
2018-06-06 21:27:02 +00:00
def SetFileImportOptions ( self , file_import_options ) :
with self . _lock :
self . _file_import_options = file_import_options
2022-01-05 22:15:56 +00:00
def SetForLogin ( self , for_login : bool ) :
2018-04-18 22:10:15 +00:00
with self . _lock :
self . _for_login = for_login
2022-01-05 22:15:56 +00:00
def SetGalleryToken ( self , token_name : str ) :
2018-08-22 21:10:59 +00:00
with self . _lock :
self . _gallery_token_name = token_name
2022-01-05 22:15:56 +00:00
def SetStatus ( self , text : str ) :
2018-04-18 22:10:15 +00:00
with self . _lock :
self . _status_text = text
def Sleep ( self , seconds ) :
with self . _lock :
self . _Sleep ( seconds )
def Start ( self ) :
try :
2018-09-12 21:36:26 +00:00
with self . _lock :
2018-10-17 21:00:09 +00:00
self . _is_started = True
2019-01-09 22:59:03 +00:00
self . _status_text = ' job started '
2018-09-12 21:36:26 +00:00
2018-04-18 22:10:15 +00:00
request_completed = False
while not request_completed :
2019-12-11 23:18:37 +00:00
if self . _IsCancelled ( ) :
2018-04-18 22:10:15 +00:00
2019-12-11 23:18:37 +00:00
return
response = None
try :
2019-06-19 22:08:48 +00:00
2018-04-18 22:10:15 +00:00
response = self . _SendRequestAndGetResponse ( )
2020-12-16 22:29:51 +00:00
# I think tbh I would rather tell requests not to do 3XX, which is possible with allow_redirects = False on request, and then just raise various 3XX exceptions with url info, so I can requeue easier and keep a record
# figuring out correct new url seems a laugh, requests has slight helpers, but lots of exceptions
# SessionRedirectMixin here https://requests.readthedocs.io/en/latest/_modules/requests/sessions/
# but this will do as a patch for now
self . _actual_fetched_url = response . url
2021-01-20 22:22:03 +00:00
if self . _actual_fetched_url != self . _url and HG . network_report_mode :
HydrusData . ShowText ( ' Network Jobs Redirect: {} -> {} ' . format ( self . _url , self . _actual_fetched_url ) )
2021-10-27 21:12:33 +00:00
self . _ParseFirstResponseHeaders ( response )
2018-06-06 21:27:02 +00:00
2018-04-18 22:10:15 +00:00
if response . ok :
with self . _lock :
2019-01-09 22:59:03 +00:00
self . _status_text = ' downloading \u2026 '
2021-10-27 21:12:33 +00:00
if self . _temp_path is None :
2019-01-09 22:59:03 +00:00
2021-10-27 21:12:33 +00:00
stream_dest = self . _stream_io
else :
stream_dest = open ( self . _temp_path , ' wb ' )
2018-04-18 22:10:15 +00:00
2021-10-27 21:12:33 +00:00
try :
2018-04-18 22:10:15 +00:00
2021-10-27 21:12:33 +00:00
more_to_download = True
2018-04-18 22:10:15 +00:00
2021-10-27 21:12:33 +00:00
while more_to_download :
more_to_download = self . _ReadResponse ( response , stream_dest )
if more_to_download :
with self . _lock :
self . _status_text = ' downloading next part \u2026 '
# this will magically have new Range header
response = self . _SendRequestAndGetResponse ( )
if not response . ok :
raise HydrusExceptions . NetworkException ( ' Ranged response failed {} ' . format ( response . status_code ) )
2018-04-18 22:10:15 +00:00
2021-10-27 21:12:33 +00:00
finally :
if self . _temp_path is not None :
2018-04-18 22:10:15 +00:00
2021-10-27 21:12:33 +00:00
stream_dest . close ( )
2018-04-18 22:10:15 +00:00
with self . _lock :
2022-03-16 02:52:54 +00:00
# we are complete here and worked ok
self . _GenerateModifiedDate ( response )
2022-05-18 20:18:25 +00:00
if ' Server ' in response . headers :
self . _response_server_header = response . headers [ ' Server ' ]
2018-04-18 22:10:15 +00:00
self . _status_text = ' done! '
else :
with self . _lock :
self . _status_text = str ( response . status_code ) + ' - ' + str ( response . reason )
2020-04-16 00:09:42 +00:00
# it is important we do this before ReadResponse, as the CF test needs r.text, which is nullified if we first access with iter_content
if not self . _we_tried_cloudflare_once :
self . _we_tried_cloudflare_once = True
self . _SolveCloudFlare ( response )
2021-10-27 21:12:33 +00:00
# don't care about 'more_to_download' here. lmao if some server ever tried to pull it off anyway
self . _ReadResponse ( response , self . _stream_io )
data = self . GetContentBytes ( )
2018-04-18 22:10:15 +00:00
with self . _lock :
( e , error_text ) = ConvertStatusCodeAndDataIntoExceptionInfo ( response . status_code , data , self . IS_HYDRUS_SERVICE )
2020-04-22 21:00:35 +00:00
if isinstance ( e , ( HydrusExceptions . BandwidthException , HydrusExceptions . ShouldReattemptNetworkException ) ) :
2019-10-02 23:38:59 +00:00
raise e
2018-04-18 22:10:15 +00:00
self . _SetError ( e , error_text )
request_completed = True
2021-10-27 21:12:33 +00:00
except HydrusExceptions . CancelledException :
with self . _lock :
self . _status_text = ' Cancelled! '
return
2019-10-02 23:38:59 +00:00
except HydrusExceptions . BandwidthException as e :
2022-06-22 20:43:12 +00:00
self . _ResetForAnotherAttempt ( )
2019-10-02 23:38:59 +00:00
2020-04-16 00:09:42 +00:00
if self . _CanReattemptRequest ( ) :
2019-10-02 23:38:59 +00:00
2020-04-16 00:09:42 +00:00
self . engine . domain_manager . ReportNetworkInfrastructureError ( self . _url )
else :
raise HydrusExceptions . BandwidthException ( ' Server reported very limited bandwidth: ' + str ( e ) )
2019-10-02 23:38:59 +00:00
2019-11-28 01:11:46 +00:00
self . _WaitOnServersideBandwidth ( ' server reported limited bandwidth ' )
2019-10-02 23:38:59 +00:00
2018-04-25 22:07:52 +00:00
except HydrusExceptions . ShouldReattemptNetworkException as e :
2022-06-22 20:43:12 +00:00
self . _ResetForAnotherAttempt ( )
2018-04-25 22:07:52 +00:00
if not self . _CanReattemptRequest ( ) :
2020-04-16 00:09:42 +00:00
raise HydrusExceptions . NetworkInfrastructureException ( ' Ran out of reattempts on this error: ' + str ( e ) )
2018-04-25 22:07:52 +00:00
2020-04-16 00:09:42 +00:00
self . _WaitOnConnectionError ( str ( e ) )
2018-04-25 22:07:52 +00:00
2018-04-18 22:10:15 +00:00
except requests . exceptions . ChunkedEncodingError :
2022-06-22 20:43:12 +00:00
self . _ResetForAnotherAttempt ( )
2018-04-18 22:10:15 +00:00
if not self . _CanReattemptRequest ( ) :
2020-04-16 00:09:42 +00:00
raise HydrusExceptions . StreamTimeoutException ( ' Unable to complete request--it broke mid-way! ' )
2018-04-18 22:10:15 +00:00
2019-06-19 22:08:48 +00:00
self . _WaitOnConnectionError ( ' connection broke mid-request ' )
2018-04-18 22:10:15 +00:00
2021-10-27 21:12:33 +00:00
except requests . exceptions . SSLError as e :
# note a requests SSLError is a ConnectionError, so careful about catching order here
self . engine . domain_manager . ReportNetworkInfrastructureError ( self . _url )
raise HydrusExceptions . ConnectionException ( ' Problem with SSL: {} ' . format ( str ( e ) ) )
2018-04-18 22:10:15 +00:00
except ( requests . exceptions . ConnectionError , requests . exceptions . ConnectTimeout ) :
2021-10-27 21:12:33 +00:00
self . _ResetForAnotherConnectionAttempt ( )
2018-04-18 22:10:15 +00:00
2020-04-16 00:09:42 +00:00
if self . _CanReattemptConnection ( ) :
self . engine . domain_manager . ReportNetworkInfrastructureError ( self . _url )
else :
2018-04-18 22:10:15 +00:00
raise HydrusExceptions . ConnectionException ( ' Could not connect! ' )
2019-06-19 22:08:48 +00:00
self . _WaitOnConnectionError ( ' connection failed ' )
2018-04-18 22:10:15 +00:00
except requests . exceptions . ReadTimeout :
2022-06-22 20:43:12 +00:00
self . _ResetForAnotherAttempt ( )
2018-04-18 22:10:15 +00:00
if not self . _CanReattemptRequest ( ) :
2020-04-16 00:09:42 +00:00
raise HydrusExceptions . StreamTimeoutException ( ' Connection successful, but reading response timed out! ' )
2018-04-18 22:10:15 +00:00
2019-06-19 22:08:48 +00:00
self . _WaitOnConnectionError ( ' read timed out ' )
2018-04-18 22:10:15 +00:00
2021-05-05 20:12:11 +00:00
except Exception as e :
if ' \' Retry \' has no attribute ' in str ( e ) :
# this is that weird requests 2.25.x(?) urllib3 maybe thread safety error
# we'll just try and pause a bit I guess!
2021-10-27 21:12:33 +00:00
self . _ResetForAnotherConnectionAttempt ( )
2021-05-05 20:12:11 +00:00
if self . _CanReattemptConnection ( ) :
self . engine . domain_manager . ReportNetworkInfrastructureError ( self . _url )
else :
raise HydrusExceptions . ConnectionException ( ' Could not connect! ' )
self . _WaitOnConnectionError ( ' connection failed, and could not recover neatly ' )
else :
raise
2019-12-11 23:18:37 +00:00
finally :
2021-01-13 21:48:58 +00:00
with self . _lock :
snc = self . _session_network_context
self . engine . session_manager . SetSessionDirty ( snc )
2019-12-11 23:18:37 +00:00
if response is not None :
# if full data was not read, the response will hang around in connection pool longer than we want
# so just an explicit close here
response . close ( )
2018-04-18 22:10:15 +00:00
except Exception as e :
with self . _lock :
trace = traceback . format_exc ( )
2022-01-05 22:15:56 +00:00
if not isinstance ( e , ( HydrusExceptions . NetworkInfrastructureException , HydrusExceptions . StreamTimeoutException , HydrusExceptions . FileImportRulesException ) ) :
2019-01-09 22:59:03 +00:00
HydrusData . Print ( trace )
2020-04-16 00:09:42 +00:00
if isinstance ( e , HydrusExceptions . NetworkInfrastructureException ) :
self . engine . domain_manager . ReportNetworkInfrastructureError ( self . _url )
2019-01-09 22:59:03 +00:00
self . _status_text = ' Error: ' + str ( e )
2018-04-18 22:10:15 +00:00
self . _SetError ( e , trace )
finally :
with self . _lock :
self . _SetDone ( )
2022-01-05 22:15:56 +00:00
def TokensOK ( self ) - > bool :
2018-08-22 21:10:59 +00:00
with self . _lock :
2018-09-12 21:36:26 +00:00
need_token = self . _gallery_token_name is not None and not self . _gallery_token_consumed
sld = self . _second_level_domain
gtn = self . _gallery_token_name
if need_token :
2018-10-31 21:41:14 +00:00
( consumed , next_timestamp ) = self . engine . bandwidth_manager . TryToConsumeAGalleryToken ( sld , gtn )
2018-09-12 21:36:26 +00:00
with self . _lock :
2018-08-22 21:10:59 +00:00
if consumed :
2021-09-01 21:09:01 +00:00
self . _status_text = ' starting soon '
2018-09-12 21:36:26 +00:00
2018-08-22 21:10:59 +00:00
self . _gallery_token_consumed = True
else :
2021-09-01 21:09:01 +00:00
if HydrusData . TimeHasPassed ( self . _last_gallery_token_estimate ) and not HydrusData . TimeHasPassed ( self . _last_gallery_token_estimate + 3 ) :
self . _status_text = ' a different {} got the chance to work ' . format ( self . _gallery_token_name )
else :
self . _status_text = ' waiting to start: {} ' . format ( ClientData . TimestampToPrettyTimeDelta ( next_timestamp , just_now_threshold = 2 , just_now_string = ' checking ' , no_prefix = True ) )
self . _last_gallery_token_estimate = next_timestamp
2018-08-22 21:10:59 +00:00
2021-09-01 21:09:01 +00:00
self . _Sleep ( 0.8 )
2018-08-22 21:10:59 +00:00
return False
2018-09-12 21:36:26 +00:00
return True
2018-08-22 21:10:59 +00:00
2020-06-17 21:31:54 +00:00
def TryToStartBandwidth ( self ) :
with self . _lock :
if self . _ObeysBandwidth ( ) :
result = self . engine . bandwidth_manager . TryToStartRequest ( self . _network_contexts )
if result :
self . _bandwidth_tracker . ReportRequestUsed ( )
else :
( bandwidth_waiting_duration , bandwidth_network_context ) = self . engine . bandwidth_manager . GetWaitingEstimateAndContext ( self . _network_contexts )
will_override = self . _bandwidth_manual_override_delayed_timestamp is not None
override_coming_first = False
if will_override :
override_waiting_duration = self . _bandwidth_manual_override_delayed_timestamp - HydrusData . GetNow ( )
override_coming_first = override_waiting_duration < bandwidth_waiting_duration
just_now_threshold = 2
if override_coming_first :
waiting_duration = override_waiting_duration
2020-06-24 21:25:24 +00:00
waiting_str = ' overriding bandwidth ' + ClientData . TimestampToPrettyTimeDelta ( self . _bandwidth_manual_override_delayed_timestamp , just_now_string = ' imminently ' , just_now_threshold = just_now_threshold )
2020-06-17 21:31:54 +00:00
else :
waiting_duration = bandwidth_waiting_duration
2021-09-01 21:09:01 +00:00
bandwidth_time_estimate = HydrusData . GetNow ( ) + waiting_duration
if HydrusData . TimeHasPassed ( self . _last_bandwidth_time_estimate ) and not HydrusData . TimeHasPassed ( self . _last_bandwidth_time_estimate + 3 ) :
waiting_str = ' a different network job got the bandwidth '
else :
waiting_str = ' bandwidth free ' + ClientData . TimestampToPrettyTimeDelta ( bandwidth_time_estimate , just_now_string = ' imminently ' , just_now_threshold = just_now_threshold )
self . _last_bandwidth_time_estimate = bandwidth_time_estimate
2020-06-17 21:31:54 +00:00
waiting_str + = ' \u2026 ( ' + bandwidth_network_context . ToHumanString ( ) + ' ) '
self . _status_text = waiting_str
if waiting_duration > 1200 :
self . _Sleep ( 30 )
elif waiting_duration > 120 :
self . _Sleep ( 10 )
elif waiting_duration > 10 :
2021-09-01 21:09:01 +00:00
self . _Sleep ( 0.8 )
2020-06-17 21:31:54 +00:00
return result
else :
self . _bandwidth_tracker . ReportRequestUsed ( )
self . engine . bandwidth_manager . ReportRequestUsed ( self . _network_contexts )
return True
2018-04-18 22:10:15 +00:00
def WaitUntilDone ( self ) :
while True :
if self . IsDone ( ) :
break
2021-12-22 22:31:23 +00:00
self . _is_done_event . wait ( 5 )
2018-04-18 22:10:15 +00:00
with self . _lock :
2022-01-19 21:28:59 +00:00
if HG . started_shutdown or HydrusThreading . IsThreadShuttingDown ( ) :
2018-04-18 22:10:15 +00:00
raise HydrusExceptions . ShutdownException ( )
elif self . _error_exception is not None :
if isinstance ( self . _error_exception , Exception ) :
raise self . _error_exception
else :
raise Exception ( ' Problem in network error handling. ' )
elif self . _IsCancelled ( ) :
if self . _method == ' POST ' :
2018-10-31 21:41:14 +00:00
message = ' Upload cancelled: ' + self . _status_text
2018-04-18 22:10:15 +00:00
else :
2018-10-31 21:41:14 +00:00
message = ' Download cancelled: ' + self . _status_text
2018-04-18 22:10:15 +00:00
raise HydrusExceptions . CancelledException ( message )
2022-01-05 22:15:56 +00:00
def WillingToWaitOnInvalidLogin ( self ) - > bool :
2018-10-31 21:41:14 +00:00
return self . WILLING_TO_WAIT_ON_INVALID_LOGIN
2018-04-18 22:10:15 +00:00
class NetworkJobDownloader ( NetworkJob ) :
def __init__ ( self , downloader_page_key , method , url , body = None , referral_url = None , temp_path = None ) :
self . _downloader_page_key = downloader_page_key
NetworkJob . __init__ ( self , method , url , body = body , referral_url = referral_url , temp_path = temp_path )
def _GenerateNetworkContexts ( self ) :
network_contexts = NetworkJob . _GenerateNetworkContexts ( self )
network_contexts . append ( ClientNetworkingContexts . NetworkContext ( CC . NETWORK_CONTEXT_DOWNLOADER_PAGE , self . _downloader_page_key ) )
return network_contexts
class NetworkJobSubscription ( NetworkJob ) :
2018-10-31 21:41:14 +00:00
WILLING_TO_WAIT_ON_INVALID_LOGIN = False
2018-04-18 22:10:15 +00:00
def __init__ ( self , subscription_key , method , url , body = None , referral_url = None , temp_path = None ) :
self . _subscription_key = subscription_key
NetworkJob . __init__ ( self , method , url , body = body , referral_url = referral_url , temp_path = temp_path )
def _GenerateNetworkContexts ( self ) :
network_contexts = NetworkJob . _GenerateNetworkContexts ( self )
network_contexts . append ( ClientNetworkingContexts . NetworkContext ( CC . NETWORK_CONTEXT_SUBSCRIPTION , self . _subscription_key ) )
return network_contexts
2022-03-30 20:28:13 +00:00
def CheckHydrusVersion ( service_type , response ) :
2018-04-18 22:10:15 +00:00
2022-03-30 20:28:13 +00:00
service_string = HC . service_string_lookup [ service_type ]
2018-04-18 22:10:15 +00:00
2022-03-30 20:28:13 +00:00
headers = response . headers
if ' server ' in headers and service_string in headers [ ' server ' ] :
2018-04-18 22:10:15 +00:00
2022-03-30 20:28:13 +00:00
server_header = headers [ ' server ' ]
2018-04-18 22:10:15 +00:00
2022-03-30 20:28:13 +00:00
elif ' hydrus-server ' in headers and service_string in headers [ ' hydrus-server ' ] :
2018-04-18 22:10:15 +00:00
2022-03-30 20:28:13 +00:00
server_header = headers [ ' hydrus-server ' ]
2018-04-18 22:10:15 +00:00
2022-03-30 20:28:13 +00:00
else :
2018-04-18 22:10:15 +00:00
2022-03-30 20:28:13 +00:00
raise HydrusExceptions . WrongServiceTypeException ( ' Target was not a ' + service_string + ' ! ' )
2018-04-18 22:10:15 +00:00
2022-03-30 20:28:13 +00:00
( service_string_gumpf , network_version ) = server_header . split ( ' / ' )
network_version = int ( network_version )
if network_version != HC . NETWORK_VERSION :
if network_version > HC . NETWORK_VERSION :
2021-07-28 21:12:00 +00:00
2022-03-30 20:28:13 +00:00
message = ' Your client is out of date; please download the latest release. '
2021-07-28 21:12:00 +00:00
else :
2018-04-18 22:10:15 +00:00
2022-03-30 20:28:13 +00:00
message = ' The server is out of date; please ask its admin to update to the latest release. '
2018-04-18 22:10:15 +00:00
2022-03-30 20:28:13 +00:00
raise HydrusExceptions . NetworkVersionException ( ' Network version mismatch! The server \' s network version was ' + str ( network_version ) + ' , whereas your client \' s is ' + str ( HC . NETWORK_VERSION ) + ' ! ' + message )
2018-04-18 22:10:15 +00:00
2022-03-30 20:28:13 +00:00
class NetworkJobHydrus ( NetworkJob ) :
WILLING_TO_WAIT_ON_INVALID_LOGIN = False
IS_HYDRUS_SERVICE = True
def __init__ ( self , service_key , method , url , body = None , referral_url = None , temp_path = None ) :
2018-04-18 22:10:15 +00:00
2022-03-30 20:28:13 +00:00
self . _service_key = service_key
NetworkJob . __init__ ( self , method , url , body = body , referral_url = referral_url , temp_path = temp_path )
2018-04-18 22:10:15 +00:00
def _GenerateNetworkContexts ( self ) :
2022-03-30 20:28:13 +00:00
network_contexts = [
ClientNetworkingContexts . GLOBAL_NETWORK_CONTEXT ,
ClientNetworkingContexts . NetworkContext ( CC . NETWORK_CONTEXT_HYDRUS , self . _service_key )
]
2018-04-18 22:10:15 +00:00
return network_contexts
def _GenerateSpecificNetworkContexts ( self ) :
# we store cookies on and login to the same hydrus-specific context
session_network_context = ClientNetworkingContexts . NetworkContext ( CC . NETWORK_CONTEXT_HYDRUS , self . _service_key )
login_network_context = session_network_context
return ( session_network_context , login_network_context )
def _ReportDataUsed ( self , num_bytes ) :
service = self . engine . controller . services_manager . GetService ( self . _service_key )
service_type = service . GetServiceType ( )
if service_type in HC . RESTRICTED_SERVICES :
account = service . GetAccount ( )
account . ReportDataUsed ( num_bytes )
NetworkJob . _ReportDataUsed ( self , num_bytes )
2021-04-20 22:01:22 +00:00
def _SendRequestAndGetResponse ( self ) - > requests . Response :
2018-04-18 22:10:15 +00:00
service = self . engine . controller . services_manager . GetService ( self . _service_key )
service_type = service . GetServiceType ( )
if service_type in HC . RESTRICTED_SERVICES :
account = service . GetAccount ( )
account . ReportRequestUsed ( )
response = NetworkJob . _SendRequestAndGetResponse ( self )
2021-04-20 22:01:22 +00:00
if response . ok and service_type in HC . RESTRICTED_SERVICES :
2018-04-18 22:10:15 +00:00
2022-03-30 20:28:13 +00:00
CheckHydrusVersion ( service_type , response )
2018-04-18 22:10:15 +00:00
return response
2018-11-28 22:31:04 +00:00
def IsHydrusJob ( self ) :
with self . _lock :
return True
2020-05-13 19:03:16 +00:00
class NetworkJobIPFS ( NetworkJob ) :
2020-08-27 01:00:42 +00:00
IS_IPFS_SERVICE = True
def __init__ ( self , url , body = None , referral_url = None , temp_path = None ) :
method = ' POST '
2020-05-13 19:03:16 +00:00
NetworkJob . __init__ ( self , method , url , body = body , referral_url = referral_url , temp_path = temp_path )
self . OnlyTryConnectionOnce ( )
self . OverrideBandwidth ( )
2020-08-27 01:00:42 +00:00
def _GetTimeouts ( self ) :
( connect_timeout , read_timeout ) = NetworkJob . _GetTimeouts ( self )
read_timeout = max ( 7200 , read_timeout )
return ( connect_timeout , read_timeout )
2020-05-13 19:03:16 +00:00
2018-04-18 22:10:15 +00:00
class NetworkJobWatcherPage ( NetworkJob ) :
2018-05-23 21:05:06 +00:00
def __init__ ( self , watcher_key , method , url , body = None , referral_url = None , temp_path = None ) :
2018-04-18 22:10:15 +00:00
2018-05-23 21:05:06 +00:00
self . _watcher_key = watcher_key
2018-04-18 22:10:15 +00:00
NetworkJob . __init__ ( self , method , url , body = body , referral_url = referral_url , temp_path = temp_path )
def _GenerateNetworkContexts ( self ) :
network_contexts = NetworkJob . _GenerateNetworkContexts ( self )
2018-05-23 21:05:06 +00:00
network_contexts . append ( ClientNetworkingContexts . NetworkContext ( CC . NETWORK_CONTEXT_WATCHER_PAGE , self . _watcher_key ) )
2018-04-18 22:10:15 +00:00
return network_contexts