2022-01-05 22:15:56 +00:00
import typing
import urllib . parse
from hydrus . core import HydrusConstants as HC
from hydrus . core import HydrusData
from hydrus . core import HydrusExceptions
from hydrus . core import HydrusSerialisable
2023-04-19 20:38:13 +00:00
from hydrus . core import HydrusTime
2022-01-05 22:15:56 +00:00
from hydrus . client import ClientStrings
from hydrus . client . networking import ClientNetworkingFunctions
GALLERY_INDEX_TYPE_PATH_COMPONENT = 0
GALLERY_INDEX_TYPE_PARAMETER = 1
SEND_REFERRAL_URL_ONLY_IF_PROVIDED = 0
SEND_REFERRAL_URL_NEVER = 1
SEND_REFERRAL_URL_CONVERTER_IF_NONE_PROVIDED = 2
SEND_REFERRAL_URL_ONLY_CONVERTER = 3
SEND_REFERRAL_URL_TYPES = [ SEND_REFERRAL_URL_ONLY_IF_PROVIDED , SEND_REFERRAL_URL_NEVER , SEND_REFERRAL_URL_CONVERTER_IF_NONE_PROVIDED , SEND_REFERRAL_URL_ONLY_CONVERTER ]
send_referral_url_string_lookup = { }
send_referral_url_string_lookup [ SEND_REFERRAL_URL_ONLY_IF_PROVIDED ] = ' send a referral url if available '
send_referral_url_string_lookup [ SEND_REFERRAL_URL_NEVER ] = ' never send a referral url '
send_referral_url_string_lookup [ SEND_REFERRAL_URL_CONVERTER_IF_NONE_PROVIDED ] = ' use the converter if no referral is available '
send_referral_url_string_lookup [ SEND_REFERRAL_URL_ONLY_CONVERTER ] = ' always use the converter referral url '
def ConvertURLClassesIntoAPIPairs ( url_classes ) :
url_classes = list ( url_classes )
SortURLClassesListDescendingComplexity ( url_classes )
pairs = [ ]
for url_class in url_classes :
if not url_class . UsesAPIURL ( ) :
continue
2024-04-03 21:15:48 +00:00
example_url = url_class . GetExampleURL ( )
api_url = url_class . GetAPIURL ( example_url )
2022-01-05 22:15:56 +00:00
for other_url_class in url_classes :
if other_url_class == url_class :
continue
if other_url_class . Matches ( api_url ) :
pairs . append ( ( url_class , other_url_class ) )
break
return pairs
def SortURLClassesListDescendingComplexity ( url_classes : typing . List [ " URLClass " ] ) :
# sort reverse = true so most complex come first
# ( num_path_components, num_required_parameters, num_total_parameters, len_example_url )
url_classes . sort ( key = lambda u_c : u_c . GetSortingComplexityKey ( ) , reverse = True )
2024-03-20 21:10:16 +00:00
class URLClassParameterFixedName ( HydrusSerialisable . SerialisableBase ) :
SERIALISABLE_TYPE = HydrusSerialisable . SERIALISABLE_TYPE_URL_CLASS_PARAMETER_FIXED_NAME
SERIALISABLE_NAME = ' URL Class Parameter - Fixed Name '
2024-03-27 21:47:50 +00:00
SERIALISABLE_VERSION = 2
2024-03-20 21:10:16 +00:00
2024-03-27 21:47:50 +00:00
def __init__ ( self , name = None , value_string_match = None ) :
2024-03-20 21:10:16 +00:00
if name is None :
name = ' name '
if value_string_match is None :
value_string_match = ClientStrings . StringMatch ( match_type = ClientStrings . STRING_MATCH_FIXED , match_value = ' value ' , example_string = ' value ' )
HydrusSerialisable . SerialisableBase . __init__ ( self )
self . _name = name
self . _value_string_match = value_string_match
2024-03-27 21:47:50 +00:00
self . _is_ephemeral = False
self . _default_value = None
self . _default_value_string_processor = ClientStrings . StringProcessor ( )
2024-03-20 21:10:16 +00:00
def __repr__ ( self ) :
text = f ' URL Class Parameter - Fixed Name: { self . _name } : { self . _value_string_match . ToString ( ) } '
return text
def _GetSerialisableInfo ( self ) :
serialisable_value_string_match = self . _value_string_match . GetSerialisableTuple ( )
2024-03-27 21:47:50 +00:00
serialisable_default_value_string_processor = self . _default_value_string_processor . GetSerialisableTuple ( )
2024-03-20 21:10:16 +00:00
2024-03-27 21:47:50 +00:00
return ( self . _name , serialisable_value_string_match , self . _is_ephemeral , self . _default_value , serialisable_default_value_string_processor )
2024-03-20 21:10:16 +00:00
def _InitialiseFromSerialisableInfo ( self , serialisable_info ) :
2024-03-27 21:47:50 +00:00
( self . _name , serialisable_value_string_match , self . _is_ephemeral , self . _default_value , serialisable_default_value_string_processor ) = serialisable_info
2024-03-20 21:10:16 +00:00
self . _value_string_match = HydrusSerialisable . CreateFromSerialisableTuple ( serialisable_value_string_match )
2024-03-27 21:47:50 +00:00
self . _default_value_string_processor = HydrusSerialisable . CreateFromSerialisableTuple ( serialisable_default_value_string_processor )
def _UpdateSerialisableInfo ( self , version , old_serialisable_info ) :
if version == 1 :
( name , serialisable_value_string_match , default_value ) = old_serialisable_info
is_ephemeral = False
default_value_string_processor = ClientStrings . StringConverter ( )
serialisable_default_value_string_processor = default_value_string_processor . GetSerialisableTuple ( )
new_serialisable_info = ( name , serialisable_value_string_match , is_ephemeral , default_value , serialisable_default_value_string_processor )
return ( 2 , new_serialisable_info )
def GetDefaultValue ( self , with_processing = False ) - > typing . Optional [ str ] :
if with_processing and self . _default_value is not None :
try :
result = self . _default_value_string_processor . ProcessStrings ( [ self . _default_value ] )
return result [ 0 ]
except :
return self . _default_value
else :
return self . _default_value
2024-03-20 21:10:16 +00:00
2024-03-27 21:47:50 +00:00
def GetDefaultValueStringProcessor ( self ) - > ClientStrings . StringProcessor :
2024-03-20 21:10:16 +00:00
2024-03-27 21:47:50 +00:00
return self . _default_value_string_processor
2024-03-20 21:10:16 +00:00
def GetName ( self ) :
return self . _name
def GetValueStringMatch ( self ) :
return self . _value_string_match
2024-03-27 21:47:50 +00:00
def HasDefaultValue ( self ) :
return self . _default_value is not None
2024-03-20 21:10:16 +00:00
def IsEphemeralToken ( self ) :
2024-03-27 21:47:50 +00:00
return self . _is_ephemeral
2024-03-20 21:10:16 +00:00
def MustBeInOriginalURL ( self ) :
2024-03-27 22:29:15 +00:00
return self . _default_value is None and not self . IsEphemeralToken ( )
2024-03-20 21:10:16 +00:00
def MatchesName ( self , name ) :
return self . _name == name
def MatchesValue ( self , value ) :
return self . _value_string_match . Matches ( value )
2024-03-27 21:47:50 +00:00
def SetDefaultValue ( self , default_value : typing . Optional [ str ] ) :
self . _default_value = default_value
def SetDefaultValueStringProcessor ( self , default_value_string_processor : ClientStrings . StringProcessor ) :
self . _default_value_string_processor = default_value_string_processor
def SetIsEphemeral ( self , value ) :
self . _is_ephemeral = value
2024-03-20 21:10:16 +00:00
def TestValue ( self , value ) :
self . _value_string_match . Test ( value )
HydrusSerialisable . SERIALISABLE_TYPES_TO_OBJECT_TYPES [ HydrusSerialisable . SERIALISABLE_TYPE_URL_CLASS_PARAMETER_FIXED_NAME ] = URLClassParameterFixedName
2022-01-05 22:15:56 +00:00
class URLClass ( HydrusSerialisable . SerialisableBaseNamed ) :
SERIALISABLE_TYPE = HydrusSerialisable . SERIALISABLE_TYPE_URL_CLASS
SERIALISABLE_NAME = ' URL Class '
2024-03-27 21:47:50 +00:00
SERIALISABLE_VERSION = 14
2022-01-05 22:15:56 +00:00
def __init__ (
self ,
name : str ,
url_class_key = None ,
url_type = None ,
preferred_scheme = ' https ' ,
netloc = ' hostname.com ' ,
path_components = None ,
parameters = None ,
has_single_value_parameters = False ,
single_value_parameters_string_match = None ,
header_overrides = None ,
api_lookup_converter = None ,
send_referral_url = SEND_REFERRAL_URL_ONLY_IF_PROVIDED ,
referral_url_converter = None ,
gallery_index_type = None ,
gallery_index_identifier = None ,
gallery_index_delta = 1 ,
example_url = ' https://hostname.com/post/page.php?id=123456&s=view '
) :
if url_class_key is None :
url_class_key = HydrusData . GenerateKey ( )
if url_type is None :
url_type = HC . URL_TYPE_POST
if path_components is None :
path_components = [ ]
path_components . append ( ( ClientStrings . StringMatch ( match_type = ClientStrings . STRING_MATCH_FIXED , match_value = ' post ' , example_string = ' post ' ) , None ) )
path_components . append ( ( ClientStrings . StringMatch ( match_type = ClientStrings . STRING_MATCH_FIXED , match_value = ' page.php ' , example_string = ' page.php ' ) , None ) )
if parameters is None :
2024-03-20 21:10:16 +00:00
parameters = [ ]
p = URLClassParameterFixedName (
name = ' s ' ,
value_string_match = ClientStrings . StringMatch ( match_type = ClientStrings . STRING_MATCH_FIXED , match_value = ' view ' , example_string = ' view ' )
)
parameters . append ( p )
2022-01-05 22:15:56 +00:00
2024-03-20 21:10:16 +00:00
p = URLClassParameterFixedName (
name = ' id ' ,
value_string_match = ClientStrings . StringMatch ( match_type = ClientStrings . STRING_MATCH_FLEXIBLE , match_value = ClientStrings . NUMERIC , example_string = ' 123456 ' )
)
2022-01-05 22:15:56 +00:00
2024-03-27 21:47:50 +00:00
parameters . append ( p )
2022-01-05 22:15:56 +00:00
if single_value_parameters_string_match is None :
single_value_parameters_string_match = ClientStrings . StringMatch ( )
if header_overrides is None :
header_overrides = { }
if api_lookup_converter is None :
api_lookup_converter = ClientStrings . StringConverter ( example_string = ' https://hostname.com/post/page.php?id=123456&s=view ' )
if referral_url_converter is None :
referral_url_converter = ClientStrings . StringConverter ( example_string = ' https://hostname.com/post/page.php?id=123456&s=view ' )
# if the args are not serialisable stuff, lets overwrite here
path_components = HydrusSerialisable . SerialisableList ( path_components )
2024-03-20 21:10:16 +00:00
parameters = HydrusSerialisable . SerialisableList ( parameters )
2022-01-05 22:15:56 +00:00
HydrusSerialisable . SerialisableBaseNamed . __init__ ( self , name )
self . _url_class_key = url_class_key
self . _url_type = url_type
self . _preferred_scheme = preferred_scheme
self . _netloc = netloc
self . _match_subdomains = False
self . _keep_matched_subdomains = False
self . _alphabetise_get_parameters = True
2022-11-30 22:06:58 +00:00
self . _no_more_path_components_than_this = False
self . _no_more_parameters_than_this = False
2024-03-27 21:47:50 +00:00
self . _keep_extra_parameters_for_server = True
2022-01-05 22:15:56 +00:00
self . _can_produce_multiple_files = False
self . _should_be_associated_with_files = True
self . _keep_fragment = False
self . _path_components = path_components
self . _parameters = parameters
self . _has_single_value_parameters = has_single_value_parameters
self . _single_value_parameters_string_match = single_value_parameters_string_match
self . _header_overrides = header_overrides
self . _api_lookup_converter = api_lookup_converter
self . _send_referral_url = send_referral_url
self . _referral_url_converter = referral_url_converter
self . _gallery_index_type = gallery_index_type
self . _gallery_index_identifier = gallery_index_identifier
self . _gallery_index_delta = gallery_index_delta
self . _example_url = example_url
2024-03-27 21:47:50 +00:00
if self . _no_more_parameters_than_this or self . _api_lookup_converter . MakesChanges ( ) :
self . _keep_extra_parameters_for_server = False
def __eq__ ( self , other ) :
if isinstance ( other , URLClass ) :
return self . __hash__ ( ) == other . __hash__ ( )
return NotImplemented
def __hash__ ( self ) :
return ( self . _name , self . _url_class_key ) . __hash__ ( )
2022-01-05 22:15:56 +00:00
def _ClipNetLoc ( self , netloc ) :
if self . _keep_matched_subdomains :
# for domains like artistname.website.com, where removing the subdomain may break the url, we leave it alone
pass
else :
# for domains like mediaserver4.website.com, where multiple subdomains serve the same content as the larger site
if not ClientNetworkingFunctions . DomainEqualsAnotherForgivingWWW ( netloc , self . _netloc ) :
netloc = self . _netloc
return netloc
2024-04-03 21:15:48 +00:00
def _ClipAndFleshOutPath ( self , path_components : typing . List [ str ] , for_server : bool ) :
2022-01-05 22:15:56 +00:00
# /post/show/1326143/akunim-anthro-armband-armwear-clothed-clothing-fem
2024-03-27 22:07:03 +00:00
do_clip = self . UsesAPIURL ( ) or not for_server
2024-03-27 21:47:50 +00:00
flesh_out = len ( path_components ) < len ( self . _path_components )
if do_clip or flesh_out :
2022-01-05 22:15:56 +00:00
clipped_path_components = [ ]
for ( index , ( string_match , default ) ) in enumerate ( self . _path_components ) :
if len ( path_components ) > index : # the given path has the value
clipped_path_component = path_components [ index ]
elif default is not None :
clipped_path_component = default
else :
raise HydrusExceptions . URLClassException ( ' Could not clip path--given url appeared to be too short! ' )
clipped_path_components . append ( clipped_path_component )
2024-04-03 21:15:48 +00:00
path_components = clipped_path_components
2022-01-05 22:15:56 +00:00
2024-04-03 21:15:48 +00:00
path = ' / ' + ' / ' . join ( path_components )
2022-01-05 22:15:56 +00:00
# /post/show/1326143
return path
2024-04-03 21:15:48 +00:00
def _ClipAndFleshOutQuery ( self , query_dict : typing . Dict [ str , str ] , single_value_parameters : typing . List [ str ] , param_order : typing . List [ str ] , for_server : bool ) :
2022-01-05 22:15:56 +00:00
2024-03-20 21:10:16 +00:00
query_dict_keys_to_parameters = { }
remaining_query_dict_names = set ( query_dict . keys ( ) )
# if we were feeling clever, we could sort these guys from most specific name to least, but w/e
for parameter in self . _parameters :
2022-01-05 22:15:56 +00:00
2024-03-20 21:10:16 +00:00
match_found = False
2022-01-05 22:15:56 +00:00
2024-03-20 21:10:16 +00:00
for name in remaining_query_dict_names :
if parameter . MatchesName ( name ) :
query_dict_keys_to_parameters [ name ] = parameter
remaining_query_dict_names . discard ( name )
match_found = True
break
2022-01-05 22:15:56 +00:00
2024-03-20 21:10:16 +00:00
if not match_found :
2024-03-27 21:47:50 +00:00
if parameter . HasDefaultValue ( ) :
2022-01-05 22:15:56 +00:00
2024-03-20 21:10:16 +00:00
if isinstance ( parameter , URLClassParameterFixedName ) :
name = parameter . GetName ( )
query_dict_keys_to_parameters [ name ] = parameter
2024-03-27 21:47:50 +00:00
query_dict [ name ] = parameter . GetDefaultValue ( with_processing = True )
2024-03-20 21:10:16 +00:00
param_order . append ( name )
else :
raise HydrusExceptions . URLClassException ( f ' Could not flesh out query--cannot figure out a fixed name for { parameter } ! ' )
2022-01-05 22:15:56 +00:00
2024-03-27 21:47:50 +00:00
else :
ok_to_be_missing = parameter . IsEphemeralToken ( )
if not ok_to_be_missing :
raise HydrusExceptions . URLClassException ( f ' Could not flesh out query--no default for { name } defined! ' )
2024-03-20 21:10:16 +00:00
for name in remaining_query_dict_names :
query_dict_keys_to_parameters [ name ] = None
# ok, we now have our fully fleshed out query_dict. let's filter it
filtered_query_dict = { }
for ( name , possible_parameter ) in query_dict_keys_to_parameters . items ( ) :
if possible_parameter is None :
2024-03-27 21:47:50 +00:00
if not ( for_server and self . _keep_extra_parameters_for_server ) :
2022-01-05 22:15:56 +00:00
2024-03-20 21:10:16 +00:00
# no matching param, discard it
continue
else :
2022-01-05 22:15:56 +00:00
2024-03-27 21:47:50 +00:00
if possible_parameter . IsEphemeralToken ( ) and not for_server :
2024-03-20 21:10:16 +00:00
continue
filtered_query_dict [ name ] = query_dict [ name ]
2022-01-05 22:15:56 +00:00
2024-03-20 21:10:16 +00:00
query_dict = filtered_query_dict
#
2022-01-05 22:15:56 +00:00
if self . _alphabetise_get_parameters :
param_order = None
2024-03-27 21:47:50 +00:00
we_want_single_value_params = self . _has_single_value_parameters or ( for_server and self . _keep_extra_parameters_for_server )
if not we_want_single_value_params :
2022-01-05 22:15:56 +00:00
single_value_parameters = [ ]
query = ClientNetworkingFunctions . ConvertQueryDictToText ( query_dict , single_value_parameters , param_order = param_order )
return query
def _GetSerialisableInfo ( self ) :
serialisable_url_class_key = self . _url_class_key . hex ( )
serialisable_path_components = [ ( string_match . GetSerialisableTuple ( ) , default ) for ( string_match , default ) in self . _path_components ]
2024-03-20 21:10:16 +00:00
serialisable_parameters = self . _parameters . GetSerialisableTuple ( )
2022-01-05 22:15:56 +00:00
serialisable_single_value_parameters_string_match = self . _single_value_parameters_string_match . GetSerialisableTuple ( )
serialisable_header_overrides = list ( self . _header_overrides . items ( ) )
serialisable_api_lookup_converter = self . _api_lookup_converter . GetSerialisableTuple ( )
serialisable_referral_url_converter = self . _referral_url_converter . GetSerialisableTuple ( )
2024-03-27 21:47:50 +00:00
booleans = ( self . _match_subdomains , self . _keep_matched_subdomains , self . _alphabetise_get_parameters , self . _no_more_path_components_than_this , self . _no_more_parameters_than_this , self . _keep_extra_parameters_for_server , self . _can_produce_multiple_files , self . _should_be_associated_with_files , self . _keep_fragment )
2022-01-05 22:15:56 +00:00
return (
serialisable_url_class_key ,
self . _url_type ,
self . _preferred_scheme ,
self . _netloc ,
booleans ,
serialisable_path_components ,
serialisable_parameters ,
self . _has_single_value_parameters ,
serialisable_single_value_parameters_string_match ,
serialisable_header_overrides ,
serialisable_api_lookup_converter ,
self . _send_referral_url ,
serialisable_referral_url_converter ,
self . _gallery_index_type ,
self . _gallery_index_identifier ,
self . _gallery_index_delta ,
self . _example_url
)
def _InitialiseFromSerialisableInfo ( self , serialisable_info ) :
(
serialisable_url_class_key ,
self . _url_type ,
self . _preferred_scheme ,
self . _netloc ,
booleans ,
serialisable_path_components ,
serialisable_parameters ,
self . _has_single_value_parameters ,
serialisable_single_value_parameters_string_match ,
serialisable_header_overrides ,
serialisable_api_lookup_converter ,
self . _send_referral_url ,
serialisable_referral_url_converter ,
self . _gallery_index_type ,
self . _gallery_index_identifier ,
self . _gallery_index_delta ,
self . _example_url
) = serialisable_info
2024-03-27 21:47:50 +00:00
( self . _match_subdomains , self . _keep_matched_subdomains , self . _alphabetise_get_parameters , self . _no_more_path_components_than_this , self . _no_more_parameters_than_this , self . _keep_extra_parameters_for_server , self . _can_produce_multiple_files , self . _should_be_associated_with_files , self . _keep_fragment ) = booleans
2022-01-05 22:15:56 +00:00
self . _url_class_key = bytes . fromhex ( serialisable_url_class_key )
self . _path_components = [ ( HydrusSerialisable . CreateFromSerialisableTuple ( serialisable_string_match ) , default ) for ( serialisable_string_match , default ) in serialisable_path_components ]
2024-03-20 21:10:16 +00:00
self . _parameters = HydrusSerialisable . CreateFromSerialisableTuple ( serialisable_parameters )
2022-01-05 22:15:56 +00:00
self . _single_value_parameters_string_match = HydrusSerialisable . CreateFromSerialisableTuple ( serialisable_single_value_parameters_string_match )
self . _header_overrides = dict ( serialisable_header_overrides )
self . _api_lookup_converter = HydrusSerialisable . CreateFromSerialisableTuple ( serialisable_api_lookup_converter )
self . _referral_url_converter = HydrusSerialisable . CreateFromSerialisableTuple ( serialisable_referral_url_converter )
2024-03-27 21:47:50 +00:00
if self . _no_more_parameters_than_this or self . _api_lookup_converter . MakesChanges ( ) :
self . _keep_extra_parameters_for_server = False
2022-01-05 22:15:56 +00:00
def _UpdateSerialisableInfo ( self , version , old_serialisable_info ) :
if version == 1 :
( url_type , preferred_scheme , netloc , match_subdomains , keep_matched_subdomains , serialisable_path_components , serialisable_parameters , example_url ) = old_serialisable_info
url_class_key = HydrusData . GenerateKey ( )
serialisable_url_class_key = url_class_key . hex ( )
api_lookup_converter = ClientStrings . StringConverter ( example_string = example_url )
serialisable_api_lookup_converter = api_lookup_converter . GetSerialisableTuple ( )
new_serialisable_info = ( serialisable_url_class_key , url_type , preferred_scheme , netloc , match_subdomains , keep_matched_subdomains , serialisable_path_components , serialisable_parameters , serialisable_api_lookup_converter , example_url )
return ( 2 , new_serialisable_info )
if version == 2 :
( serialisable_url_class_key , url_type , preferred_scheme , netloc , match_subdomains , keep_matched_subdomains , serialisable_path_components , serialisable_parameters , serialisable_api_lookup_converter , example_url ) = old_serialisable_info
if url_type in ( HC . URL_TYPE_FILE , HC . URL_TYPE_POST ) :
should_be_associated_with_files = True
else :
should_be_associated_with_files = False
new_serialisable_info = ( serialisable_url_class_key , url_type , preferred_scheme , netloc , match_subdomains , keep_matched_subdomains , serialisable_path_components , serialisable_parameters , serialisable_api_lookup_converter , should_be_associated_with_files , example_url )
return ( 3 , new_serialisable_info )
if version == 3 :
( serialisable_url_class_key , url_type , preferred_scheme , netloc , match_subdomains , keep_matched_subdomains , serialisable_path_components , serialisable_parameters , serialisable_api_lookup_converter , should_be_associated_with_files , example_url ) = old_serialisable_info
can_produce_multiple_files = False
new_serialisable_info = ( serialisable_url_class_key , url_type , preferred_scheme , netloc , match_subdomains , keep_matched_subdomains , serialisable_path_components , serialisable_parameters , serialisable_api_lookup_converter , can_produce_multiple_files , should_be_associated_with_files , example_url )
return ( 4 , new_serialisable_info )
if version == 4 :
( serialisable_url_class_key , url_type , preferred_scheme , netloc , match_subdomains , keep_matched_subdomains , serialisable_path_components , serialisable_parameters , serialisable_api_lookup_converter , can_produce_multiple_files , should_be_associated_with_files , example_url ) = old_serialisable_info
gallery_index_type = None
gallery_index_identifier = None
gallery_index_delta = 1
new_serialisable_info = ( serialisable_url_class_key , url_type , preferred_scheme , netloc , match_subdomains , keep_matched_subdomains , serialisable_path_components , serialisable_parameters , serialisable_api_lookup_converter , can_produce_multiple_files , should_be_associated_with_files , gallery_index_type , gallery_index_identifier , gallery_index_delta , example_url )
return ( 5 , new_serialisable_info )
if version == 5 :
( serialisable_url_class_key , url_type , preferred_scheme , netloc , match_subdomains , keep_matched_subdomains , serialisable_path_components , serialisable_parameters , serialisable_api_lookup_converter , can_produce_multiple_files , should_be_associated_with_files , gallery_index_type , gallery_index_identifier , gallery_index_delta , example_url ) = old_serialisable_info
path_components = HydrusSerialisable . CreateFromSerialisableTuple ( serialisable_path_components )
parameters = HydrusSerialisable . CreateFromSerialisableTuple ( serialisable_parameters )
path_components = [ ( value , None ) for value in path_components ]
parameters = { key : ( value , None ) for ( key , value ) in list ( parameters . items ( ) ) }
serialisable_path_components = [ ( string_match . GetSerialisableTuple ( ) , default ) for ( string_match , default ) in path_components ]
serialisable_parameters = [ ( key , ( string_match . GetSerialisableTuple ( ) , default ) ) for ( key , ( string_match , default ) ) in list ( parameters . items ( ) ) ]
new_serialisable_info = ( serialisable_url_class_key , url_type , preferred_scheme , netloc , match_subdomains , keep_matched_subdomains , serialisable_path_components , serialisable_parameters , serialisable_api_lookup_converter , can_produce_multiple_files , should_be_associated_with_files , gallery_index_type , gallery_index_identifier , gallery_index_delta , example_url )
return ( 6 , new_serialisable_info )
if version == 6 :
( serialisable_url_class_key , url_type , preferred_scheme , netloc , match_subdomains , keep_matched_subdomains , serialisable_path_components , serialisable_parameters , serialisable_api_lookup_converter , can_produce_multiple_files , should_be_associated_with_files , gallery_index_type , gallery_index_identifier , gallery_index_delta , example_url ) = old_serialisable_info
send_referral_url = SEND_REFERRAL_URL_ONLY_IF_PROVIDED
referral_url_converter = ClientStrings . StringConverter ( example_string = ' https://hostname.com/post/page.php?id=123456&s=view ' )
serialisable_referrel_url_converter = referral_url_converter . GetSerialisableTuple ( )
new_serialisable_info = ( serialisable_url_class_key , url_type , preferred_scheme , netloc , match_subdomains , keep_matched_subdomains , serialisable_path_components , serialisable_parameters , serialisable_api_lookup_converter , send_referral_url , serialisable_referrel_url_converter , can_produce_multiple_files , should_be_associated_with_files , gallery_index_type , gallery_index_identifier , gallery_index_delta , example_url )
return ( 7 , new_serialisable_info )
if version == 7 :
( serialisable_url_class_key , url_type , preferred_scheme , netloc , match_subdomains , keep_matched_subdomains , serialisable_path_components , serialisable_parameters , serialisable_api_lookup_converter , send_referral_url , serialisable_referrel_url_converter , can_produce_multiple_files , should_be_associated_with_files , gallery_index_type , gallery_index_identifier , gallery_index_delta , example_url ) = old_serialisable_info
alphabetise_get_parameters = True
new_serialisable_info = ( serialisable_url_class_key , url_type , preferred_scheme , netloc , match_subdomains , keep_matched_subdomains , alphabetise_get_parameters , serialisable_path_components , serialisable_parameters , serialisable_api_lookup_converter , send_referral_url , serialisable_referrel_url_converter , can_produce_multiple_files , should_be_associated_with_files , gallery_index_type , gallery_index_identifier , gallery_index_delta , example_url )
return ( 8 , new_serialisable_info )
if version == 8 :
( serialisable_url_class_key , url_type , preferred_scheme , netloc , match_subdomains , keep_matched_subdomains , alphabetise_get_parameters , serialisable_path_components , serialisable_parameters , serialisable_api_lookup_converter , send_referral_url , serialisable_referrel_url_converter , can_produce_multiple_files , should_be_associated_with_files , gallery_index_type , gallery_index_identifier , gallery_index_delta , example_url ) = old_serialisable_info
keep_fragment = False
booleans = ( match_subdomains , keep_matched_subdomains , alphabetise_get_parameters , can_produce_multiple_files , should_be_associated_with_files , keep_fragment )
new_serialisable_info = ( serialisable_url_class_key , url_type , preferred_scheme , netloc , booleans , serialisable_path_components , serialisable_parameters , serialisable_api_lookup_converter , send_referral_url , serialisable_referrel_url_converter , gallery_index_type , gallery_index_identifier , gallery_index_delta , example_url )
return ( 9 , new_serialisable_info )
if version == 9 :
( serialisable_url_class_key , url_type , preferred_scheme , netloc , booleans , serialisable_path_components , serialisable_parameters , serialisable_api_lookup_converter , send_referral_url , serialisable_referrel_url_converter , gallery_index_type , gallery_index_identifier , gallery_index_delta , example_url ) = old_serialisable_info
header_overrides = { }
serialisable_header_overrides = list ( header_overrides . items ( ) )
new_serialisable_info = ( serialisable_url_class_key , url_type , preferred_scheme , netloc , booleans , serialisable_path_components , serialisable_parameters , serialisable_header_overrides , serialisable_api_lookup_converter , send_referral_url , serialisable_referrel_url_converter , gallery_index_type , gallery_index_identifier , gallery_index_delta , example_url )
return ( 10 , new_serialisable_info )
if version == 10 :
( serialisable_url_class_key , url_type , preferred_scheme , netloc , booleans , serialisable_path_components , serialisable_parameters , serialisable_header_overrides , serialisable_api_lookup_converter , send_referral_url , serialisable_referrel_url_converter , gallery_index_type , gallery_index_identifier , gallery_index_delta , example_url ) = old_serialisable_info
has_single_value_parameters = False
single_value_parameters_string_match = ClientStrings . StringMatch ( )
serialisable_single_value_parameters_match = single_value_parameters_string_match . GetSerialisableTuple ( )
new_serialisable_info = (
serialisable_url_class_key ,
url_type ,
preferred_scheme ,
netloc ,
booleans ,
serialisable_path_components ,
serialisable_parameters ,
has_single_value_parameters ,
serialisable_single_value_parameters_match ,
serialisable_header_overrides ,
serialisable_api_lookup_converter ,
send_referral_url ,
serialisable_referrel_url_converter ,
gallery_index_type ,
gallery_index_identifier ,
gallery_index_delta ,
example_url
)
return ( 11 , new_serialisable_info )
2022-11-30 22:06:58 +00:00
if version == 11 :
(
serialisable_url_class_key ,
url_type ,
preferred_scheme ,
netloc ,
booleans ,
serialisable_path_components ,
serialisable_parameters ,
has_single_value_parameters ,
serialisable_single_value_parameters_match ,
serialisable_header_overrides ,
serialisable_api_lookup_converter ,
send_referral_url ,
serialisable_referrel_url_converter ,
gallery_index_type ,
gallery_index_identifier ,
gallery_index_delta ,
example_url
) = old_serialisable_info
( match_subdomains , keep_matched_subdomains , alphabetise_get_parameters , can_produce_multiple_files , should_be_associated_with_files , keep_fragment ) = booleans
no_more_path_components_than_this = False
no_more_parameters_than_this = False
booleans = ( match_subdomains , keep_matched_subdomains , alphabetise_get_parameters , no_more_path_components_than_this , no_more_parameters_than_this , can_produce_multiple_files , should_be_associated_with_files , keep_fragment )
new_serialisable_info = (
serialisable_url_class_key ,
url_type ,
preferred_scheme ,
netloc ,
booleans ,
serialisable_path_components ,
serialisable_parameters ,
has_single_value_parameters ,
serialisable_single_value_parameters_match ,
serialisable_header_overrides ,
serialisable_api_lookup_converter ,
send_referral_url ,
serialisable_referrel_url_converter ,
gallery_index_type ,
gallery_index_identifier ,
gallery_index_delta ,
example_url
)
return ( 12 , new_serialisable_info )
2024-03-20 21:10:16 +00:00
if version == 12 :
(
serialisable_url_class_key ,
url_type ,
preferred_scheme ,
netloc ,
booleans ,
serialisable_path_components ,
serialisable_parameters ,
has_single_value_parameters ,
serialisable_single_value_parameters_match ,
serialisable_header_overrides ,
serialisable_api_lookup_converter ,
send_referral_url ,
serialisable_referrel_url_converter ,
gallery_index_type ,
gallery_index_identifier ,
gallery_index_delta ,
example_url
) = old_serialisable_info
2024-03-27 21:47:50 +00:00
def encode_fixed_string_match ( s_m : ClientStrings . StringMatch ) - > ClientStrings . StringMatch :
( match_type , match_value , min_chars , max_chars , example_string ) = s_m . ToTuple ( )
if match_type == ClientStrings . STRING_MATCH_FIXED :
match_value = urllib . parse . quote ( match_value )
example_string = urllib . parse . quote ( example_string )
s_m = ClientStrings . StringMatch (
match_type = match_type ,
match_value = match_value ,
min_chars = min_chars ,
max_chars = max_chars ,
example_string = example_string
)
return s_m
2024-03-20 21:10:16 +00:00
new_parameters = HydrusSerialisable . SerialisableList ( )
for ( name , ( serialisable_value_string_match , default_value ) ) in serialisable_parameters :
2024-03-27 21:47:50 +00:00
# we are converting from post[id] to post%5Bid%5D
name = urllib . parse . quote ( name )
2024-03-20 21:10:16 +00:00
value_string_match = HydrusSerialisable . CreateFromSerialisableTuple ( serialisable_value_string_match )
2024-03-27 21:47:50 +00:00
value_string_match = encode_fixed_string_match ( value_string_match )
2024-03-20 21:10:16 +00:00
parameter = URLClassParameterFixedName (
name = name ,
2024-03-27 21:47:50 +00:00
value_string_match = value_string_match
2024-03-20 21:10:16 +00:00
)
2024-03-27 21:47:50 +00:00
if default_value is not None :
default_value = urllib . parse . quote ( default_value )
parameter . SetDefaultValue ( default_value )
2024-03-20 21:10:16 +00:00
new_parameters . append ( parameter )
serialisable_parameters = new_parameters . GetSerialisableTuple ( )
2024-03-27 21:47:50 +00:00
path_components = [ ( HydrusSerialisable . CreateFromSerialisableTuple ( serialisable_string_match ) , default ) for ( serialisable_string_match , default ) in serialisable_path_components ]
new_path_components = [ ]
for ( string_match , default ) in path_components :
string_match = encode_fixed_string_match ( string_match )
if default is not None :
default = urllib . parse . quote ( default )
new_path_components . append ( ( string_match , default ) )
serialisable_path_components = [ ( string_match . GetSerialisableTuple ( ) , default ) for ( string_match , default ) in new_path_components ]
2024-03-20 21:10:16 +00:00
new_serialisable_info = (
serialisable_url_class_key ,
url_type ,
preferred_scheme ,
netloc ,
booleans ,
serialisable_path_components ,
serialisable_parameters ,
has_single_value_parameters ,
serialisable_single_value_parameters_match ,
serialisable_header_overrides ,
serialisable_api_lookup_converter ,
send_referral_url ,
serialisable_referrel_url_converter ,
gallery_index_type ,
gallery_index_identifier ,
gallery_index_delta ,
example_url
)
return ( 13 , new_serialisable_info )
2024-03-27 21:47:50 +00:00
if version == 13 :
(
serialisable_url_class_key ,
url_type ,
preferred_scheme ,
netloc ,
booleans ,
serialisable_path_components ,
serialisable_parameters ,
has_single_value_parameters ,
serialisable_single_value_parameters_match ,
serialisable_header_overrides ,
serialisable_api_lookup_converter ,
send_referral_url ,
serialisable_referrel_url_converter ,
gallery_index_type ,
gallery_index_identifier ,
gallery_index_delta ,
example_url
) = old_serialisable_info
( match_subdomains , keep_matched_subdomains , alphabetise_get_parameters , no_more_path_components_than_this , no_more_parameters_than_this , can_produce_multiple_files , should_be_associated_with_files , keep_fragment ) = booleans
api_lookup_converter = HydrusSerialisable . CreateFromSerialisableTuple ( serialisable_api_lookup_converter )
keep_extra_parameters_for_server = True
if no_more_parameters_than_this or api_lookup_converter . MakesChanges ( ) or url_type not in ( HC . URL_TYPE_GALLERY , HC . URL_TYPE_WATCHABLE ) :
keep_extra_parameters_for_server = False
booleans = ( match_subdomains , keep_matched_subdomains , alphabetise_get_parameters , no_more_path_components_than_this , no_more_parameters_than_this , keep_extra_parameters_for_server , can_produce_multiple_files , should_be_associated_with_files , keep_fragment )
new_serialisable_info = (
serialisable_url_class_key ,
url_type ,
preferred_scheme ,
netloc ,
booleans ,
serialisable_path_components ,
serialisable_parameters ,
has_single_value_parameters ,
serialisable_single_value_parameters_match ,
serialisable_header_overrides ,
serialisable_api_lookup_converter ,
send_referral_url ,
serialisable_referrel_url_converter ,
gallery_index_type ,
gallery_index_identifier ,
gallery_index_delta ,
example_url
)
return ( 14 , new_serialisable_info )
2022-01-05 22:15:56 +00:00
def AlphabetiseGetParameters ( self ) :
return self . _alphabetise_get_parameters
def CanGenerateNextGalleryPage ( self ) :
if self . _url_type == HC . URL_TYPE_GALLERY :
if self . _gallery_index_type is not None :
return True
return False
def CanReferToMultipleFiles ( self ) :
is_a_gallery_page = self . _url_type in ( HC . URL_TYPE_GALLERY , HC . URL_TYPE_WATCHABLE )
is_a_multipost_post_page = self . _url_type == HC . URL_TYPE_POST and self . _can_produce_multiple_files
return is_a_gallery_page or is_a_multipost_post_page
2024-03-20 21:10:16 +00:00
def GetAPILookupConverter ( self ) :
return self . _api_lookup_converter
2024-04-03 21:15:48 +00:00
def GetAPIURL ( self , url ) :
2022-01-05 22:15:56 +00:00
2024-04-03 21:15:48 +00:00
request_url = self . Normalise ( url , for_server = True )
2022-01-05 22:15:56 +00:00
2024-04-03 21:15:48 +00:00
return self . _api_lookup_converter . Convert ( request_url )
2022-01-05 22:15:56 +00:00
def GetClassKey ( self ) :
return self . _url_class_key
def GetDomain ( self ) :
return self . _netloc
def GetExampleURL ( self ) :
return self . _example_url
def GetGalleryIndexValues ( self ) :
return ( self . _gallery_index_type , self . _gallery_index_identifier , self . _gallery_index_delta )
def GetHeaderOverrides ( self ) :
return self . _header_overrides
2024-03-20 21:10:16 +00:00
def GetNetloc ( self ) :
return self . _netloc
2022-01-05 22:15:56 +00:00
def GetNextGalleryPage ( self , url ) :
2024-03-27 21:47:50 +00:00
url = self . Normalise ( url , for_server = True )
2022-01-05 22:15:56 +00:00
p = ClientNetworkingFunctions . ParseURL ( url )
scheme = p . scheme
netloc = p . netloc
path = p . path
query = p . query
params = ' '
fragment = p . fragment
if self . _gallery_index_type == GALLERY_INDEX_TYPE_PATH_COMPONENT :
page_index_path_component_index = self . _gallery_index_identifier
2024-04-03 21:15:48 +00:00
path_components = ClientNetworkingFunctions . ConvertPathTextToList ( path )
2022-01-05 22:15:56 +00:00
try :
page_index = path_components [ page_index_path_component_index ]
except IndexError :
raise HydrusExceptions . URLClassException ( ' Could not generate next gallery page--not enough path components! ' )
try :
page_index = int ( page_index )
except :
raise HydrusExceptions . URLClassException ( ' Could not generate next gallery page--index component was not an integer! ' )
path_components [ page_index_path_component_index ] = str ( page_index + self . _gallery_index_delta )
path = ' / ' + ' / ' . join ( path_components )
elif self . _gallery_index_type == GALLERY_INDEX_TYPE_PARAMETER :
page_index_name = self . _gallery_index_identifier
( query_dict , single_value_parameters , param_order ) = ClientNetworkingFunctions . ConvertQueryTextToDict ( query )
if page_index_name not in query_dict :
raise HydrusExceptions . URLClassException ( ' Could not generate next gallery page--did not find ' + str ( self . _gallery_index_identifier ) + ' in parameters! ' )
page_index = query_dict [ page_index_name ]
try :
page_index = int ( page_index )
except :
raise HydrusExceptions . URLClassException ( ' Could not generate next gallery page--index component was not an integer! ' )
query_dict [ page_index_name ] = page_index + self . _gallery_index_delta
if self . _alphabetise_get_parameters :
param_order = None
if not self . _has_single_value_parameters :
single_value_parameters = [ ]
query = ClientNetworkingFunctions . ConvertQueryDictToText ( query_dict , single_value_parameters , param_order = param_order )
else :
raise NotImplementedError ( ' Did not understand the next gallery page rules! ' )
r = urllib . parse . ParseResult ( scheme , netloc , path , params , query , fragment )
return r . geturl ( )
2024-03-20 21:10:16 +00:00
def GetParameters ( self ) - > typing . List [ URLClassParameterFixedName ] :
return self . _parameters
def GetPathComponents ( self ) :
return self . _path_components
def GetPreferredScheme ( self ) :
return self . _preferred_scheme
2022-01-05 22:15:56 +00:00
def GetReferralURL ( self , url , referral_url ) :
if self . _send_referral_url == SEND_REFERRAL_URL_ONLY_IF_PROVIDED :
return referral_url
elif self . _send_referral_url == SEND_REFERRAL_URL_NEVER :
return None
elif self . _send_referral_url in ( SEND_REFERRAL_URL_CONVERTER_IF_NONE_PROVIDED , SEND_REFERRAL_URL_ONLY_CONVERTER ) :
2024-04-03 21:15:48 +00:00
request_url = self . Normalise ( url , for_server = True )
2022-01-05 22:15:56 +00:00
try :
2024-04-03 21:15:48 +00:00
converted_referral_url = self . _referral_url_converter . Convert ( request_url )
2022-01-05 22:15:56 +00:00
except HydrusExceptions . StringConvertException :
return referral_url
p1 = self . _send_referral_url == SEND_REFERRAL_URL_ONLY_CONVERTER
p2 = self . _send_referral_url == SEND_REFERRAL_URL_CONVERTER_IF_NONE_PROVIDED and referral_url is None
if p1 or p2 :
return converted_referral_url
else :
return referral_url
return referral_url
2024-03-20 21:10:16 +00:00
def GetReferralURLInfo ( self ) :
return ( self . _send_referral_url , self . _referral_url_converter )
2022-01-05 22:15:56 +00:00
def GetSafeSummary ( self ) :
return ' URL Class " ' + self . _name + ' " - ' + ClientNetworkingFunctions . ConvertURLIntoDomain ( self . GetExampleURL ( ) )
def GetSingleValueParameterData ( self ) :
return ( self . _has_single_value_parameters , self . _single_value_parameters_string_match )
def GetSortingComplexityKey ( self ) :
# we sort url classes so that
# site.com/post/123456
# comes before
# site.com/search?query=blah
# I used to do gallery first, then post, then file, but it ultimately was unhelpful in some situations and better handled by strict component/parameter matching
num_required_path_components = len ( [ 1 for ( string_match , default ) in self . _path_components if default is None ] )
num_total_path_components = len ( self . _path_components )
2024-03-27 21:47:50 +00:00
num_required_parameters = len ( [ 1 for parameter in self . _parameters if not parameter . HasDefaultValue ( ) ] )
2022-01-05 22:15:56 +00:00
num_total_parameters = len ( self . _parameters )
2024-03-20 22:17:18 +00:00
try :
2024-03-27 21:47:50 +00:00
len_example_url = len ( self . Normalise ( self . _example_url , for_server = True ) )
2024-03-20 22:17:18 +00:00
except :
len_example_url = len ( self . _example_url )
2022-01-05 22:15:56 +00:00
2022-03-16 02:52:54 +00:00
return ( num_required_path_components , num_total_path_components , num_required_parameters , num_total_parameters , len_example_url )
2022-01-05 22:15:56 +00:00
def GetURLBooleans ( self ) :
return ( self . _match_subdomains , self . _keep_matched_subdomains , self . _alphabetise_get_parameters , self . _can_produce_multiple_files , self . _should_be_associated_with_files , self . _keep_fragment )
def GetURLType ( self ) :
return self . _url_type
def IsGalleryURL ( self ) :
return self . _url_type == HC . URL_TYPE_GALLERY
def IsParsable ( self ) :
return self . _url_type in ( HC . URL_TYPE_POST , HC . URL_TYPE_GALLERY , HC . URL_TYPE_WATCHABLE )
def IsPostURL ( self ) :
return self . _url_type == HC . URL_TYPE_POST
def IsWatchableURL ( self ) :
return self . _url_type == HC . URL_TYPE_WATCHABLE
2024-03-27 21:47:50 +00:00
def KeepExtraParametersForServer ( self ) :
return self . _keep_extra_parameters_for_server
2022-01-05 22:15:56 +00:00
def Matches ( self , url ) :
try :
self . Test ( url )
return True
except HydrusExceptions . URLClassException :
return False
def MatchesSubdomains ( self ) :
return self . _match_subdomains
2024-03-27 21:47:50 +00:00
def Normalise ( self , url , for_server = False ) :
2022-01-05 22:15:56 +00:00
p = ClientNetworkingFunctions . ParseURL ( url )
scheme = self . _preferred_scheme
params = ' '
if self . _keep_fragment :
fragment = p . fragment
else :
fragment = ' '
2024-04-03 21:15:48 +00:00
path_components = ClientNetworkingFunctions . ConvertPathTextToList ( p . path )
( query_dict , single_value_parameters , param_order ) = ClientNetworkingFunctions . ConvertQueryTextToDict ( p . query )
2024-03-27 21:47:50 +00:00
netloc = self . _ClipNetLoc ( p . netloc )
2024-04-03 21:15:48 +00:00
path = self . _ClipAndFleshOutPath ( path_components , for_server )
query = self . _ClipAndFleshOutQuery ( query_dict , single_value_parameters , param_order , for_server )
2022-01-05 22:15:56 +00:00
r = urllib . parse . ParseResult ( scheme , netloc , path , params , query , fragment )
return r . geturl ( )
2022-11-30 22:06:58 +00:00
def NoMorePathComponentsThanThis ( self ) - > bool :
return self . _no_more_path_components_than_this
def NoMoreParametersThanThis ( self ) - > bool :
return self . _no_more_parameters_than_this
2022-01-05 22:15:56 +00:00
def RefersToOneFile ( self ) :
is_a_direct_file_page = self . _url_type == HC . URL_TYPE_FILE
is_a_single_file_post_page = self . _url_type == HC . URL_TYPE_POST and not self . _can_produce_multiple_files
return is_a_direct_file_page or is_a_single_file_post_page
def RegenerateClassKey ( self ) :
self . _url_class_key = HydrusData . GenerateKey ( )
def SetAlphabetiseGetParameters ( self , alphabetise_get_parameters : bool ) :
self . _alphabetise_get_parameters = alphabetise_get_parameters
def SetClassKey ( self , match_key ) :
self . _url_class_key = match_key
def SetExampleURL ( self , example_url ) :
self . _example_url = example_url
2024-03-27 21:47:50 +00:00
def SetKeepExtraParametersForServer ( self , value ) :
self . _keep_extra_parameters_for_server = value
2022-11-30 22:06:58 +00:00
def SetNoMorePathComponentsThanThis ( self , no_more : bool ) :
self . _no_more_path_components_than_this = no_more
def SetNoMoreParametersThanThis ( self , no_more : bool ) :
self . _no_more_parameters_than_this = no_more
2022-01-05 22:15:56 +00:00
def SetSingleValueParameterData ( self , has_single_value_parameters : bool , single_value_parameters_string_match : ClientStrings . StringMatch ) :
self . _has_single_value_parameters = has_single_value_parameters
self . _single_value_parameters_string_match = single_value_parameters_string_match
def SetURLBooleans (
self ,
match_subdomains : bool ,
keep_matched_subdomains : bool ,
alphabetise_get_parameters : bool ,
can_produce_multiple_files : bool ,
should_be_associated_with_files : bool ,
keep_fragment : bool
) :
self . _match_subdomains = match_subdomains
self . _keep_matched_subdomains = keep_matched_subdomains
self . _alphabetise_get_parameters = alphabetise_get_parameters
self . _can_produce_multiple_files = can_produce_multiple_files
self . _should_be_associated_with_files = should_be_associated_with_files
self . _keep_fragment = keep_fragment
def ShouldAssociateWithFiles ( self ) :
return self . _should_be_associated_with_files
def Test ( self , url ) :
p = ClientNetworkingFunctions . ParseURL ( url )
if self . _match_subdomains :
if p . netloc != self . _netloc and not p . netloc . endswith ( ' . ' + self . _netloc ) :
raise HydrusExceptions . URLClassException ( p . netloc + ' (potentially excluding subdomains) did not match ' + self . _netloc )
else :
if not ClientNetworkingFunctions . DomainEqualsAnotherForgivingWWW ( p . netloc , self . _netloc ) :
raise HydrusExceptions . URLClassException ( p . netloc + ' did not match ' + self . _netloc )
2024-04-03 21:15:48 +00:00
path = p . path
query = p . query
2022-01-05 22:15:56 +00:00
2024-04-03 21:15:48 +00:00
path_components = ClientNetworkingFunctions . ConvertPathTextToList ( path )
( query_dict , single_value_parameters , param_order ) = ClientNetworkingFunctions . ConvertQueryTextToDict ( query )
2022-01-05 22:15:56 +00:00
2024-03-20 21:10:16 +00:00
if self . _no_more_path_components_than_this :
2022-11-30 22:06:58 +00:00
2024-04-03 21:15:48 +00:00
if len ( path_components ) > len ( self . _path_components ) :
2024-03-20 21:10:16 +00:00
2024-04-03 21:15:48 +00:00
raise HydrusExceptions . URLClassException ( ' " {} " has {} path components, but I will not allow more than my defined {} ! ' . format ( path , len ( path_components ) , len ( self . _path_components ) ) )
2024-03-20 21:10:16 +00:00
2022-11-30 22:06:58 +00:00
2022-01-05 22:15:56 +00:00
for ( index , ( string_match , default ) ) in enumerate ( self . _path_components ) :
2024-04-03 21:15:48 +00:00
if len ( path_components ) > index :
2022-01-05 22:15:56 +00:00
2024-04-03 21:15:48 +00:00
path_component = path_components [ index ]
2022-01-05 22:15:56 +00:00
try :
2024-04-03 21:15:48 +00:00
string_match . Test ( path_component )
2022-01-05 22:15:56 +00:00
except HydrusExceptions . StringMatchException as e :
raise HydrusExceptions . URLClassException ( str ( e ) )
elif default is None :
2022-03-16 20:20:58 +00:00
if index + 1 == len ( self . _path_components ) :
2024-04-03 21:15:48 +00:00
message = ' " {} " has {} path components, but I was expecting {} ! ' . format ( path , len ( path_components ) , len ( self . _path_components ) )
2022-03-16 20:20:58 +00:00
else :
2024-04-03 21:15:48 +00:00
message = ' " {} " has {} path components, but I was expecting at least {} and maybe as many as {} ! ' . format ( path , len ( path_components ) , index + 1 , len ( self . _path_components ) )
2022-03-16 20:20:58 +00:00
raise HydrusExceptions . URLClassException ( message )
2022-01-05 22:15:56 +00:00
2024-03-20 21:10:16 +00:00
if self . _no_more_parameters_than_this :
2022-11-30 22:06:58 +00:00
2024-03-20 21:10:16 +00:00
good_fixed_names = { parameter . GetName ( ) for parameter in self . _parameters if isinstance ( parameter , URLClassParameterFixedName ) }
2022-01-05 22:15:56 +00:00
2024-04-03 21:15:48 +00:00
for ( name , value ) in query_dict . items ( ) :
2022-01-05 22:15:56 +00:00
2024-03-20 21:10:16 +00:00
if name not in good_fixed_names :
2022-01-05 22:15:56 +00:00
2024-03-20 21:10:16 +00:00
raise HydrusExceptions . URLClassException ( f ' " This has a " { name } " parameter, but I am set to not allow any unexpected parameters! ' )
2022-01-05 22:15:56 +00:00
2024-03-20 21:10:16 +00:00
for parameter in self . _parameters :
2022-01-05 22:15:56 +00:00
2024-03-20 21:10:16 +00:00
if isinstance ( parameter , URLClassParameterFixedName ) :
name = parameter . GetName ( )
2022-01-05 22:15:56 +00:00
2024-04-03 21:15:48 +00:00
if name not in query_dict :
2024-03-20 21:10:16 +00:00
if parameter . MustBeInOriginalURL ( ) :
raise HydrusExceptions . URLClassException ( f ' { name } not found in { p . query } ' )
else :
continue
2022-01-05 22:15:56 +00:00
2024-04-03 21:15:48 +00:00
value = query_dict [ name ]
2022-01-05 22:15:56 +00:00
2024-03-20 21:10:16 +00:00
try :
parameter . TestValue ( value )
except HydrusExceptions . StringMatchException as e :
raise HydrusExceptions . URLClassException ( f ' Problem with { name } : ' + str ( e ) )
2022-01-05 22:15:56 +00:00
2022-11-30 22:06:58 +00:00
if len ( single_value_parameters ) > 0 and not self . _has_single_value_parameters and self . _no_more_parameters_than_this :
2024-04-03 21:15:48 +00:00
raise HydrusExceptions . URLClassException ( ' " {} " has unexpected single-value parameters, but I am set to not allow any unexpected parameters! ' . format ( query ) )
2022-11-30 22:06:58 +00:00
2022-01-05 22:15:56 +00:00
if self . _has_single_value_parameters :
if len ( single_value_parameters ) == 0 :
raise HydrusExceptions . URLClassException ( ' Was expecting single-value parameter(s), but this URL did not seem to have any. ' )
for single_value_parameter in single_value_parameters :
try :
self . _single_value_parameters_string_match . Test ( single_value_parameter )
except HydrusExceptions . StringMatchException as e :
raise HydrusExceptions . URLClassException ( str ( e ) )
def UsesAPIURL ( self ) :
return self . _api_lookup_converter . MakesChanges ( )
HydrusSerialisable . SERIALISABLE_TYPES_TO_OBJECT_TYPES [ HydrusSerialisable . SERIALISABLE_TYPE_URL_CLASS ] = URLClass