Revert "Better handling of redirects (#1493)"
This reverts commit 5fee29dbd7
.
This commit is contained in:
parent
5fee29dbd7
commit
a825fce02e
|
@ -33,7 +33,6 @@ from hydrus.client.importing.options import TagImportOptions
|
|||
from hydrus.client.media import ClientMediaManagers
|
||||
from hydrus.client.metadata import ClientTags
|
||||
from hydrus.client.networking import ClientNetworkingFunctions
|
||||
from hydrus.client.networking import ClientNetworkingJobs
|
||||
|
||||
FILE_SEED_TYPE_HDD = 0
|
||||
FILE_SEED_TYPE_URL = 1
|
||||
|
@ -642,7 +641,7 @@ class FileSeed( HydrusSerialisable.SerialisableBase ):
|
|||
|
||||
url_to_fetch = HG.client_controller.network_engine.domain_manager.GetURLToFetch( file_url )
|
||||
|
||||
network_job: ClientNetworkingJobs.NetworkJob = network_job_factory( 'GET', url_to_fetch, temp_path = temp_path, referral_url = referral_url )
|
||||
network_job = network_job_factory( 'GET', url_to_fetch, temp_path = temp_path, referral_url = referral_url )
|
||||
|
||||
for ( key, value ) in self._request_headers.items():
|
||||
|
||||
|
@ -668,15 +667,13 @@ class FileSeed( HydrusSerialisable.SerialisableBase ):
|
|||
self._AddPrimaryURLs( ( url_to_fetch, ) )
|
||||
|
||||
|
||||
#actual_fetched_url = network_job.GetActualFetchedURL()
|
||||
actual_fetched_url = network_job.GetActualFetchedURL()
|
||||
|
||||
redirected_url = network_job.GetRedirectedUrl()
|
||||
|
||||
if redirected_url is not None:
|
||||
if actual_fetched_url not in ( file_url, url_to_fetch ):
|
||||
|
||||
self._AddPrimaryURLs( ( redirected_url, ) )
|
||||
self._AddPrimaryURLs( ( actual_fetched_url, ) )
|
||||
|
||||
( actual_url_type, actual_match_name, actual_can_parse, actual_cannot_parse_reason ) = HG.client_controller.network_engine.domain_manager.GetURLParseCapability( redirected_url )
|
||||
( actual_url_type, actual_match_name, actual_can_parse, actual_cannot_parse_reason ) = HG.client_controller.network_engine.domain_manager.GetURLParseCapability( actual_fetched_url )
|
||||
|
||||
if actual_url_type == HC.URL_TYPE_POST and actual_can_parse:
|
||||
|
||||
|
@ -684,7 +681,7 @@ class FileSeed( HydrusSerialisable.SerialisableBase ):
|
|||
|
||||
if file_seed_cache is None:
|
||||
|
||||
raise Exception( 'The downloader thought it had a raw file url with "{}", but that redirected to the apparent Post URL "{}", but then there was no file log in which to queue that download!'.format( file_url, redirected_url ) )
|
||||
raise Exception( 'The downloader thought it had a raw file url with "{}", but that redirected to the apparent Post URL "{}", but then there was no file log in which to queue that download!'.format( file_url, actual_fetched_url ) )
|
||||
|
||||
else:
|
||||
|
||||
|
@ -692,10 +689,10 @@ class FileSeed( HydrusSerialisable.SerialisableBase ):
|
|||
|
||||
if original_url_type == actual_url_type and original_match_name == actual_match_name:
|
||||
|
||||
raise Exception( 'The downloader thought it had a raw file url with "{}", but that redirected to the apparent Post URL "{}". As that URL has the same class as this import job\'s original URL, we are stopping here in case this is a looping redirect!'.format( file_url, redirected_url ) )
|
||||
raise Exception( 'The downloader thought it had a raw file url with "{}", but that redirected to the apparent Post URL "{}". As that URL has the same class as this import job\'s original URL, we are stopping here in case this is a looping redirect!'.format( file_url, actual_fetched_url ) )
|
||||
|
||||
|
||||
file_seed = FileSeed( FILE_SEED_TYPE_URL, redirected_url )
|
||||
file_seed = FileSeed( FILE_SEED_TYPE_URL, actual_fetched_url )
|
||||
|
||||
file_seed.SetReferralURL( file_url )
|
||||
|
||||
|
@ -1383,19 +1380,19 @@ class FileSeed( HydrusSerialisable.SerialisableBase ):
|
|||
|
||||
parsing_text = network_job.GetContentText()
|
||||
|
||||
redirected_url = network_job.GetRedirectedUrl()
|
||||
actual_fetched_url = network_job.GetActualFetchedURL()
|
||||
|
||||
if redirected_url is not None:
|
||||
if actual_fetched_url != url_to_check:
|
||||
|
||||
# we have redirected, a 3XX response
|
||||
|
||||
( actual_url_type, actual_match_name, actual_can_parse, actual_cannot_parse_reason ) = HG.client_controller.network_engine.domain_manager.GetURLParseCapability( redirected_url )
|
||||
( actual_url_type, actual_match_name, actual_can_parse, actual_cannot_parse_reason ) = HG.client_controller.network_engine.domain_manager.GetURLParseCapability( actual_fetched_url )
|
||||
|
||||
if actual_url_type == HC.URL_TYPE_POST and actual_can_parse:
|
||||
|
||||
self._AddPrimaryURLs( ( redirected_url, ) )
|
||||
self._AddPrimaryURLs( ( actual_fetched_url, ) )
|
||||
|
||||
post_url = redirected_url
|
||||
post_url = actual_fetched_url
|
||||
|
||||
url_for_child_referral = post_url
|
||||
|
||||
|
|
|
@ -447,19 +447,19 @@ class GallerySeed( HydrusSerialisable.SerialisableBase ):
|
|||
|
||||
parsing_text = network_job.GetContentText()
|
||||
|
||||
redirected_url = network_job.GetRedirectedUrl()
|
||||
actual_fetched_url = network_job.GetActualFetchedURL()
|
||||
|
||||
do_parse = True
|
||||
|
||||
if redirected_url is not None:
|
||||
if actual_fetched_url != url_to_check:
|
||||
|
||||
( url_type, match_name, can_parse, cannot_parse_reason ) = HG.client_controller.network_engine.domain_manager.GetURLParseCapability( redirected_url )
|
||||
( url_type, match_name, can_parse, cannot_parse_reason ) = HG.client_controller.network_engine.domain_manager.GetURLParseCapability( actual_fetched_url )
|
||||
|
||||
if url_type == HC.URL_TYPE_GALLERY:
|
||||
|
||||
if can_parse:
|
||||
|
||||
gallery_url = redirected_url
|
||||
gallery_url = actual_fetched_url
|
||||
|
||||
url_for_child_referral = gallery_url
|
||||
|
||||
|
@ -480,7 +480,7 @@ class GallerySeed( HydrusSerialisable.SerialisableBase ):
|
|||
|
||||
from hydrus.client.importing import ClientImportFileSeeds
|
||||
|
||||
file_seed = ClientImportFileSeeds.FileSeed( ClientImportFileSeeds.FILE_SEED_TYPE_URL, redirected_url )
|
||||
file_seed = ClientImportFileSeeds.FileSeed( ClientImportFileSeeds.FILE_SEED_TYPE_URL, actual_fetched_url )
|
||||
|
||||
file_seed.SetReferralURL( url_for_child_referral )
|
||||
|
||||
|
|
|
@ -180,8 +180,6 @@ class NetworkJob( object ):
|
|||
self._actual_fetched_url = self._url
|
||||
self._temp_path = temp_path
|
||||
|
||||
self._redirected_url = None
|
||||
|
||||
self._response_server_header = None
|
||||
self._response_last_modified = None
|
||||
|
||||
|
@ -787,11 +785,11 @@ class NetworkJob( object ):
|
|||
snc = self._session_network_context
|
||||
|
||||
|
||||
session: requests.Session = self.engine.session_manager.GetSession( snc )
|
||||
session = self.engine.session_manager.GetSession( snc )
|
||||
|
||||
( connect_timeout, read_timeout ) = self._GetTimeouts()
|
||||
|
||||
response = session.request( method, url, data = data, files = files, headers = headers, stream = True, timeout = ( connect_timeout, read_timeout ), allow_redirects = False )
|
||||
response = session.request( method, url, data = data, files = files, headers = headers, stream = True, timeout = ( connect_timeout, read_timeout ) )
|
||||
|
||||
with self._lock:
|
||||
|
||||
|
@ -1223,14 +1221,6 @@ class NetworkJob( object ):
|
|||
|
||||
|
||||
|
||||
def GetRedirectedUrl( self ):
|
||||
|
||||
with self._lock:
|
||||
|
||||
return self._redirected_url
|
||||
|
||||
|
||||
|
||||
def GetContentBytes( self ):
|
||||
|
||||
with self._lock:
|
||||
|
@ -1322,7 +1312,7 @@ class NetworkJob( object ):
|
|||
|
||||
|
||||
|
||||
def GetSession( self ) -> requests.Session:
|
||||
def GetSession( self ):
|
||||
|
||||
with self._lock:
|
||||
|
||||
|
@ -1564,21 +1554,20 @@ class NetworkJob( object ):
|
|||
|
||||
response = self._SendRequestAndGetResponse()
|
||||
|
||||
if response.is_redirect:
|
||||
# I think tbh I would rather tell requests not to do 3XX, which is possible with allow_redirects = False on request, and then just raise various 3XX exceptions with url info, so I can requeue easier and keep a record
|
||||
# figuring out correct new url seems a laugh, requests has slight helpers, but lots of exceptions
|
||||
# SessionRedirectMixin here https://requests.readthedocs.io/en/latest/_modules/requests/sessions/
|
||||
# but this will do as a patch for now
|
||||
self._actual_fetched_url = response.url
|
||||
|
||||
if self._actual_fetched_url != self._url and HG.network_report_mode:
|
||||
|
||||
session = self.GetSession()
|
||||
|
||||
self._redirected_url = session.get_redirect_target( response )
|
||||
|
||||
if HG.network_report_mode:
|
||||
|
||||
HydrusData.ShowText( 'Network Jobs Redirect: {} -> {}'.format( self._url, self._redirected_url ) )
|
||||
|
||||
HydrusData.ShowText( 'Network Jobs Redirect: {} -> {}'.format( self._url, self._actual_fetched_url ) )
|
||||
|
||||
|
||||
elif response.ok:
|
||||
|
||||
self._ParseFirstResponseHeaders( response )
|
||||
self._ParseFirstResponseHeaders( response )
|
||||
|
||||
if response.ok:
|
||||
|
||||
with self._lock:
|
||||
|
||||
|
@ -1643,8 +1632,6 @@ class NetworkJob( object ):
|
|||
|
||||
else:
|
||||
|
||||
self._ParseFirstResponseHeaders( response )
|
||||
|
||||
with self._lock:
|
||||
|
||||
self._status_text = str( response.status_code ) + ' - ' + str( response.reason )
|
||||
|
|
Loading…
Reference in New Issue