1577 lines
61 KiB
Python
1577 lines
61 KiB
Python
import typing
|
|
import urllib.parse
|
|
|
|
from hydrus.core import HydrusConstants as HC
|
|
from hydrus.core import HydrusData
|
|
from hydrus.core import HydrusExceptions
|
|
from hydrus.core import HydrusSerialisable
|
|
from hydrus.core import HydrusTime
|
|
|
|
from hydrus.client import ClientStrings
|
|
from hydrus.client.networking import ClientNetworkingFunctions
|
|
|
|
GALLERY_INDEX_TYPE_PATH_COMPONENT = 0
|
|
GALLERY_INDEX_TYPE_PARAMETER = 1
|
|
|
|
SEND_REFERRAL_URL_ONLY_IF_PROVIDED = 0
|
|
SEND_REFERRAL_URL_NEVER = 1
|
|
SEND_REFERRAL_URL_CONVERTER_IF_NONE_PROVIDED = 2
|
|
SEND_REFERRAL_URL_ONLY_CONVERTER = 3
|
|
|
|
SEND_REFERRAL_URL_TYPES = [ SEND_REFERRAL_URL_ONLY_IF_PROVIDED, SEND_REFERRAL_URL_NEVER, SEND_REFERRAL_URL_CONVERTER_IF_NONE_PROVIDED, SEND_REFERRAL_URL_ONLY_CONVERTER ]
|
|
|
|
send_referral_url_string_lookup = {}
|
|
|
|
send_referral_url_string_lookup[ SEND_REFERRAL_URL_ONLY_IF_PROVIDED ] = 'send a referral url if available'
|
|
send_referral_url_string_lookup[ SEND_REFERRAL_URL_NEVER ] = 'never send a referral url'
|
|
send_referral_url_string_lookup[ SEND_REFERRAL_URL_CONVERTER_IF_NONE_PROVIDED ] = 'use the converter if no referral is available'
|
|
send_referral_url_string_lookup[ SEND_REFERRAL_URL_ONLY_CONVERTER ] = 'always use the converter referral url'
|
|
|
|
def ConvertURLClassesIntoAPIPairs( url_classes ):
|
|
|
|
url_classes = list( url_classes )
|
|
|
|
SortURLClassesListDescendingComplexity( url_classes )
|
|
|
|
pairs = []
|
|
|
|
for url_class in url_classes:
|
|
|
|
if not url_class.UsesAPIURL():
|
|
|
|
continue
|
|
|
|
|
|
example_url = url_class.GetExampleURL()
|
|
|
|
try:
|
|
|
|
api_url = url_class.GetAPIURL( example_url )
|
|
|
|
except:
|
|
|
|
continue
|
|
|
|
|
|
for other_url_class in url_classes:
|
|
|
|
if other_url_class == url_class:
|
|
|
|
continue
|
|
|
|
|
|
if other_url_class.Matches( api_url ):
|
|
|
|
pairs.append( ( url_class, other_url_class ) )
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
return pairs
|
|
|
|
def SortURLClassesListDescendingComplexity( url_classes: typing.List[ "URLClass" ] ):
|
|
|
|
# sort reverse = true so most complex come first
|
|
|
|
# ( num_path_components, num_required_parameters, num_total_parameters, len_example_url )
|
|
url_classes.sort( key = lambda u_c: u_c.GetSortingComplexityKey(), reverse = True )
|
|
|
|
|
|
class URLClassParameterFixedName( HydrusSerialisable.SerialisableBase ):
|
|
|
|
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_URL_CLASS_PARAMETER_FIXED_NAME
|
|
SERIALISABLE_NAME = 'URL Class Parameter - Fixed Name'
|
|
SERIALISABLE_VERSION = 2
|
|
|
|
def __init__( self, name = None, value_string_match = None ):
|
|
|
|
if name is None:
|
|
|
|
name = 'name'
|
|
|
|
|
|
if value_string_match is None:
|
|
|
|
value_string_match = ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FIXED, match_value = 'value', example_string = 'value' )
|
|
|
|
|
|
HydrusSerialisable.SerialisableBase.__init__( self )
|
|
|
|
self._name = name
|
|
self._value_string_match = value_string_match
|
|
|
|
self._is_ephemeral = False
|
|
|
|
self._default_value = None
|
|
self._default_value_string_processor = ClientStrings.StringProcessor()
|
|
|
|
|
|
def __repr__( self ):
|
|
|
|
text = f'URL Class Parameter - Fixed Name: {self._name}: {self._value_string_match.ToString()}'
|
|
|
|
return text
|
|
|
|
|
|
def _GetSerialisableInfo( self ):
|
|
|
|
serialisable_value_string_match = self._value_string_match.GetSerialisableTuple()
|
|
serialisable_default_value_string_processor = self._default_value_string_processor.GetSerialisableTuple()
|
|
|
|
return ( self._name, serialisable_value_string_match, self._is_ephemeral, self._default_value, serialisable_default_value_string_processor )
|
|
|
|
|
|
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
|
|
|
|
( self._name, serialisable_value_string_match, self._is_ephemeral, self._default_value, serialisable_default_value_string_processor ) = serialisable_info
|
|
|
|
self._value_string_match = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_value_string_match )
|
|
self._default_value_string_processor = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_default_value_string_processor )
|
|
|
|
|
|
def _UpdateSerialisableInfo( self, version, old_serialisable_info ):
|
|
|
|
if version == 1:
|
|
|
|
( name, serialisable_value_string_match, default_value ) = old_serialisable_info
|
|
|
|
is_ephemeral = False
|
|
default_value_string_processor = ClientStrings.StringConverter()
|
|
|
|
serialisable_default_value_string_processor = default_value_string_processor.GetSerialisableTuple()
|
|
|
|
new_serialisable_info = ( name, serialisable_value_string_match, is_ephemeral, default_value, serialisable_default_value_string_processor )
|
|
|
|
return ( 2, new_serialisable_info )
|
|
|
|
|
|
|
|
def GetDefaultValue( self, with_processing = False ) -> typing.Optional[ str ]:
|
|
|
|
if with_processing and self._default_value is not None:
|
|
|
|
try:
|
|
|
|
result = self._default_value_string_processor.ProcessStrings( [ self._default_value ] )
|
|
|
|
return result[0]
|
|
|
|
except:
|
|
|
|
return self._default_value
|
|
|
|
|
|
else:
|
|
|
|
return self._default_value
|
|
|
|
|
|
|
|
def GetDefaultValueStringProcessor( self ) -> ClientStrings.StringProcessor:
|
|
|
|
return self._default_value_string_processor
|
|
|
|
|
|
def GetName( self ):
|
|
|
|
return self._name
|
|
|
|
|
|
def GetValueStringMatch( self ):
|
|
|
|
return self._value_string_match
|
|
|
|
|
|
def HasDefaultValue( self ):
|
|
|
|
return self._default_value is not None
|
|
|
|
|
|
def IsEphemeralToken( self ):
|
|
|
|
return self._is_ephemeral
|
|
|
|
|
|
def MustBeInOriginalURL( self ):
|
|
|
|
return self._default_value is None and not self.IsEphemeralToken()
|
|
|
|
|
|
def MatchesName( self, name ):
|
|
|
|
return self._name == name
|
|
|
|
|
|
def MatchesValue( self, value ):
|
|
|
|
return self._value_string_match.Matches( value )
|
|
|
|
|
|
def SetDefaultValue( self, default_value: typing.Optional[ str ] ):
|
|
|
|
self._default_value = default_value
|
|
|
|
|
|
def SetDefaultValueStringProcessor( self, default_value_string_processor: ClientStrings.StringProcessor ):
|
|
|
|
self._default_value_string_processor = default_value_string_processor
|
|
|
|
|
|
def SetIsEphemeral( self, value ):
|
|
|
|
self._is_ephemeral = value
|
|
|
|
|
|
def TestValue( self, value ):
|
|
|
|
self._value_string_match.Test( value )
|
|
|
|
|
|
|
|
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_URL_CLASS_PARAMETER_FIXED_NAME ] = URLClassParameterFixedName
|
|
|
|
class URLClass( HydrusSerialisable.SerialisableBaseNamed ):
|
|
|
|
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_URL_CLASS
|
|
SERIALISABLE_NAME = 'URL Class'
|
|
SERIALISABLE_VERSION = 14
|
|
|
|
def __init__(
|
|
self,
|
|
name: str,
|
|
url_class_key = None,
|
|
url_type = None,
|
|
preferred_scheme = 'https',
|
|
netloc = 'hostname.com',
|
|
path_components = None,
|
|
parameters = None,
|
|
has_single_value_parameters = False,
|
|
single_value_parameters_string_match = None,
|
|
header_overrides = None,
|
|
api_lookup_converter = None,
|
|
send_referral_url = SEND_REFERRAL_URL_ONLY_IF_PROVIDED,
|
|
referral_url_converter = None,
|
|
gallery_index_type = None,
|
|
gallery_index_identifier = None,
|
|
gallery_index_delta = 1,
|
|
example_url = 'https://hostname.com/post/page.php?id=123456&s=view'
|
|
):
|
|
|
|
if url_class_key is None:
|
|
|
|
url_class_key = HydrusData.GenerateKey()
|
|
|
|
|
|
if url_type is None:
|
|
|
|
url_type = HC.URL_TYPE_POST
|
|
|
|
|
|
if path_components is None:
|
|
|
|
path_components = []
|
|
|
|
path_components.append( ( ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FIXED, match_value = 'post', example_string = 'post' ), None ) )
|
|
path_components.append( ( ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FIXED, match_value = 'page.php', example_string = 'page.php' ), None ) )
|
|
|
|
|
|
if parameters is None:
|
|
|
|
parameters = []
|
|
|
|
p = URLClassParameterFixedName(
|
|
name = 's',
|
|
value_string_match = ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FIXED, match_value = 'view', example_string = 'view' )
|
|
)
|
|
|
|
parameters.append( p )
|
|
|
|
p = URLClassParameterFixedName(
|
|
name = 'id',
|
|
value_string_match = ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FLEXIBLE, match_value = ClientStrings.NUMERIC, example_string = '123456' )
|
|
)
|
|
|
|
parameters.append( p )
|
|
|
|
|
|
if single_value_parameters_string_match is None:
|
|
|
|
single_value_parameters_string_match = ClientStrings.StringMatch()
|
|
|
|
|
|
if header_overrides is None:
|
|
|
|
header_overrides = {}
|
|
|
|
|
|
if api_lookup_converter is None:
|
|
|
|
api_lookup_converter = ClientStrings.StringConverter( example_string = 'https://hostname.com/post/page.php?id=123456&s=view' )
|
|
|
|
|
|
if referral_url_converter is None:
|
|
|
|
referral_url_converter = ClientStrings.StringConverter( example_string = 'https://hostname.com/post/page.php?id=123456&s=view' )
|
|
|
|
|
|
# if the args are not serialisable stuff, lets overwrite here
|
|
|
|
path_components = HydrusSerialisable.SerialisableList( path_components )
|
|
parameters = HydrusSerialisable.SerialisableList( parameters )
|
|
|
|
HydrusSerialisable.SerialisableBaseNamed.__init__( self, name )
|
|
|
|
self._url_class_key = url_class_key
|
|
self._url_type = url_type
|
|
self._preferred_scheme = preferred_scheme
|
|
self._netloc = netloc
|
|
|
|
self._match_subdomains = False
|
|
self._keep_matched_subdomains = False
|
|
self._alphabetise_get_parameters = True
|
|
self._no_more_path_components_than_this = False
|
|
self._no_more_parameters_than_this = False
|
|
self._keep_extra_parameters_for_server = True
|
|
self._can_produce_multiple_files = False
|
|
self._should_be_associated_with_files = True
|
|
self._keep_fragment = False
|
|
|
|
self._path_components = path_components
|
|
self._parameters = parameters
|
|
self._has_single_value_parameters = has_single_value_parameters
|
|
self._single_value_parameters_string_match = single_value_parameters_string_match
|
|
self._header_overrides = header_overrides
|
|
self._api_lookup_converter = api_lookup_converter
|
|
|
|
self._send_referral_url = send_referral_url
|
|
self._referral_url_converter = referral_url_converter
|
|
|
|
self._gallery_index_type = gallery_index_type
|
|
self._gallery_index_identifier = gallery_index_identifier
|
|
self._gallery_index_delta = gallery_index_delta
|
|
|
|
self._example_url = example_url
|
|
|
|
if self._no_more_parameters_than_this or self._api_lookup_converter.MakesChanges():
|
|
|
|
self._keep_extra_parameters_for_server = False
|
|
|
|
|
|
|
|
def __eq__( self, other ):
|
|
|
|
if isinstance( other, URLClass ):
|
|
|
|
return self.__hash__() == other.__hash__()
|
|
|
|
|
|
return NotImplemented
|
|
|
|
|
|
def __hash__( self ):
|
|
|
|
return ( self._name, self._url_class_key ).__hash__()
|
|
|
|
|
|
def _ClipNetLoc( self, netloc ):
|
|
|
|
if self._keep_matched_subdomains:
|
|
|
|
# for domains like artistname.website.com, where removing the subdomain may break the url, we leave it alone
|
|
|
|
pass
|
|
|
|
else:
|
|
|
|
# for domains like mediaserver4.website.com, where multiple subdomains serve the same content as the larger site
|
|
|
|
if not ClientNetworkingFunctions.DomainEqualsAnotherForgivingWWW( netloc, self._netloc ):
|
|
|
|
netloc = self._netloc
|
|
|
|
|
|
|
|
return netloc
|
|
|
|
|
|
def _ClipAndFleshOutPath( self, path_components: typing.List[ str ], for_server: bool ):
|
|
|
|
# /post/show/1326143/akunim-anthro-armband-armwear-clothed-clothing-fem
|
|
|
|
do_clip = self.UsesAPIURL() or not for_server
|
|
flesh_out = len( path_components ) < len( self._path_components )
|
|
|
|
if do_clip or flesh_out:
|
|
|
|
clipped_path_components = []
|
|
|
|
for ( index, ( string_match, default ) ) in enumerate( self._path_components ):
|
|
|
|
if len( path_components ) > index: # the given path has the value
|
|
|
|
clipped_path_component = path_components[ index ]
|
|
|
|
elif default is not None:
|
|
|
|
clipped_path_component = default
|
|
|
|
else:
|
|
|
|
raise HydrusExceptions.URLClassException( 'Could not clip path--given url appeared to be too short!' )
|
|
|
|
|
|
clipped_path_components.append( clipped_path_component )
|
|
|
|
|
|
path_components = clipped_path_components
|
|
|
|
|
|
path = '/' + '/'.join( path_components )
|
|
|
|
# /post/show/1326143
|
|
|
|
return path
|
|
|
|
|
|
def _ClipAndFleshOutQuery( self, query_dict: typing.Dict[ str, str ], single_value_parameters: typing.List[ str ], param_order: typing.List[ str ], for_server: bool ):
|
|
|
|
query_dict_keys_to_parameters = {}
|
|
|
|
remaining_query_dict_names = set( query_dict.keys() )
|
|
|
|
# if we were feeling clever, we could sort these guys from most specific name to least, but w/e
|
|
for parameter in self._parameters:
|
|
|
|
match_found = False
|
|
|
|
for name in remaining_query_dict_names:
|
|
|
|
if parameter.MatchesName( name ):
|
|
|
|
query_dict_keys_to_parameters[ name ] = parameter
|
|
|
|
remaining_query_dict_names.discard( name )
|
|
|
|
match_found = True
|
|
|
|
break
|
|
|
|
|
|
|
|
if not match_found:
|
|
|
|
if parameter.HasDefaultValue():
|
|
|
|
if isinstance( parameter, URLClassParameterFixedName ):
|
|
|
|
name = parameter.GetName()
|
|
|
|
query_dict_keys_to_parameters[ name ] = parameter
|
|
|
|
query_dict[ name ] = parameter.GetDefaultValue( with_processing = True )
|
|
|
|
param_order.append( name )
|
|
|
|
else:
|
|
|
|
raise HydrusExceptions.URLClassException( f'Could not flesh out query--cannot figure out a fixed name for {parameter}!' )
|
|
|
|
|
|
else:
|
|
|
|
ok_to_be_missing = parameter.IsEphemeralToken()
|
|
|
|
if not ok_to_be_missing:
|
|
|
|
raise HydrusExceptions.URLClassException( f'Could not flesh out query--no default for {name} defined!' )
|
|
|
|
|
|
|
|
|
|
|
|
for name in remaining_query_dict_names:
|
|
|
|
query_dict_keys_to_parameters[ name ] = None
|
|
|
|
|
|
# ok, we now have our fully fleshed out query_dict. let's filter it
|
|
|
|
filtered_query_dict = {}
|
|
|
|
for ( name, possible_parameter ) in query_dict_keys_to_parameters.items():
|
|
|
|
if possible_parameter is None:
|
|
|
|
if not ( for_server and self._keep_extra_parameters_for_server ):
|
|
|
|
# no matching param, discard it
|
|
continue
|
|
|
|
|
|
else:
|
|
|
|
if possible_parameter.IsEphemeralToken() and not for_server:
|
|
|
|
continue
|
|
|
|
|
|
|
|
filtered_query_dict[ name ] = query_dict[ name ]
|
|
|
|
|
|
query_dict = filtered_query_dict
|
|
|
|
#
|
|
|
|
if self._alphabetise_get_parameters:
|
|
|
|
param_order = None
|
|
|
|
|
|
we_want_single_value_params = self._has_single_value_parameters or ( for_server and self._keep_extra_parameters_for_server )
|
|
|
|
if not we_want_single_value_params:
|
|
|
|
single_value_parameters = []
|
|
|
|
|
|
query = ClientNetworkingFunctions.ConvertQueryDictToText( query_dict, single_value_parameters, param_order = param_order )
|
|
|
|
return query
|
|
|
|
|
|
def _GetSerialisableInfo( self ):
|
|
|
|
serialisable_url_class_key = self._url_class_key.hex()
|
|
serialisable_path_components = [ ( string_match.GetSerialisableTuple(), default ) for ( string_match, default ) in self._path_components ]
|
|
serialisable_parameters = self._parameters.GetSerialisableTuple()
|
|
serialisable_single_value_parameters_string_match = self._single_value_parameters_string_match.GetSerialisableTuple()
|
|
serialisable_header_overrides = list( self._header_overrides.items() )
|
|
serialisable_api_lookup_converter = self._api_lookup_converter.GetSerialisableTuple()
|
|
serialisable_referral_url_converter = self._referral_url_converter.GetSerialisableTuple()
|
|
|
|
booleans = ( self._match_subdomains, self._keep_matched_subdomains, self._alphabetise_get_parameters, self._no_more_path_components_than_this, self._no_more_parameters_than_this, self._keep_extra_parameters_for_server, self._can_produce_multiple_files, self._should_be_associated_with_files, self._keep_fragment )
|
|
|
|
return (
|
|
serialisable_url_class_key,
|
|
self._url_type,
|
|
self._preferred_scheme,
|
|
self._netloc,
|
|
booleans,
|
|
serialisable_path_components,
|
|
serialisable_parameters,
|
|
self._has_single_value_parameters,
|
|
serialisable_single_value_parameters_string_match,
|
|
serialisable_header_overrides,
|
|
serialisable_api_lookup_converter,
|
|
self._send_referral_url,
|
|
serialisable_referral_url_converter,
|
|
self._gallery_index_type,
|
|
self._gallery_index_identifier,
|
|
self._gallery_index_delta,
|
|
self._example_url
|
|
)
|
|
|
|
|
|
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
|
|
|
|
(
|
|
serialisable_url_class_key,
|
|
self._url_type,
|
|
self._preferred_scheme,
|
|
self._netloc,
|
|
booleans,
|
|
serialisable_path_components,
|
|
serialisable_parameters,
|
|
self._has_single_value_parameters,
|
|
serialisable_single_value_parameters_string_match,
|
|
serialisable_header_overrides,
|
|
serialisable_api_lookup_converter,
|
|
self._send_referral_url,
|
|
serialisable_referral_url_converter,
|
|
self._gallery_index_type,
|
|
self._gallery_index_identifier,
|
|
self._gallery_index_delta,
|
|
self._example_url
|
|
) = serialisable_info
|
|
|
|
( self._match_subdomains, self._keep_matched_subdomains, self._alphabetise_get_parameters, self._no_more_path_components_than_this, self._no_more_parameters_than_this, self._keep_extra_parameters_for_server, self._can_produce_multiple_files, self._should_be_associated_with_files, self._keep_fragment ) = booleans
|
|
|
|
self._url_class_key = bytes.fromhex( serialisable_url_class_key )
|
|
self._path_components = [ ( HydrusSerialisable.CreateFromSerialisableTuple( serialisable_string_match ), default ) for ( serialisable_string_match, default ) in serialisable_path_components ]
|
|
self._parameters = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_parameters )
|
|
self._single_value_parameters_string_match = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_single_value_parameters_string_match )
|
|
self._header_overrides = dict( serialisable_header_overrides )
|
|
self._api_lookup_converter = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_api_lookup_converter )
|
|
self._referral_url_converter = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_referral_url_converter )
|
|
|
|
if self._no_more_parameters_than_this or self._api_lookup_converter.MakesChanges():
|
|
|
|
self._keep_extra_parameters_for_server = False
|
|
|
|
|
|
|
|
def _UpdateSerialisableInfo( self, version, old_serialisable_info ):
|
|
|
|
if version == 1:
|
|
|
|
( url_type, preferred_scheme, netloc, match_subdomains, keep_matched_subdomains, serialisable_path_components, serialisable_parameters, example_url ) = old_serialisable_info
|
|
|
|
url_class_key = HydrusData.GenerateKey()
|
|
|
|
serialisable_url_class_key = url_class_key.hex()
|
|
|
|
api_lookup_converter = ClientStrings.StringConverter( example_string = example_url )
|
|
|
|
serialisable_api_lookup_converter = api_lookup_converter.GetSerialisableTuple()
|
|
|
|
new_serialisable_info = ( serialisable_url_class_key, url_type, preferred_scheme, netloc, match_subdomains, keep_matched_subdomains, serialisable_path_components, serialisable_parameters, serialisable_api_lookup_converter, example_url )
|
|
|
|
return ( 2, new_serialisable_info )
|
|
|
|
|
|
if version == 2:
|
|
|
|
( serialisable_url_class_key, url_type, preferred_scheme, netloc, match_subdomains, keep_matched_subdomains, serialisable_path_components, serialisable_parameters, serialisable_api_lookup_converter, example_url ) = old_serialisable_info
|
|
|
|
if url_type in ( HC.URL_TYPE_FILE, HC.URL_TYPE_POST ):
|
|
|
|
should_be_associated_with_files = True
|
|
|
|
else:
|
|
|
|
should_be_associated_with_files = False
|
|
|
|
|
|
new_serialisable_info = ( serialisable_url_class_key, url_type, preferred_scheme, netloc, match_subdomains, keep_matched_subdomains, serialisable_path_components, serialisable_parameters, serialisable_api_lookup_converter, should_be_associated_with_files, example_url )
|
|
|
|
return ( 3, new_serialisable_info )
|
|
|
|
|
|
if version == 3:
|
|
|
|
( serialisable_url_class_key, url_type, preferred_scheme, netloc, match_subdomains, keep_matched_subdomains, serialisable_path_components, serialisable_parameters, serialisable_api_lookup_converter, should_be_associated_with_files, example_url ) = old_serialisable_info
|
|
|
|
can_produce_multiple_files = False
|
|
|
|
new_serialisable_info = ( serialisable_url_class_key, url_type, preferred_scheme, netloc, match_subdomains, keep_matched_subdomains, serialisable_path_components, serialisable_parameters, serialisable_api_lookup_converter, can_produce_multiple_files, should_be_associated_with_files, example_url )
|
|
|
|
return ( 4, new_serialisable_info )
|
|
|
|
|
|
if version == 4:
|
|
|
|
( serialisable_url_class_key, url_type, preferred_scheme, netloc, match_subdomains, keep_matched_subdomains, serialisable_path_components, serialisable_parameters, serialisable_api_lookup_converter, can_produce_multiple_files, should_be_associated_with_files, example_url ) = old_serialisable_info
|
|
|
|
gallery_index_type = None
|
|
gallery_index_identifier = None
|
|
gallery_index_delta = 1
|
|
|
|
new_serialisable_info = ( serialisable_url_class_key, url_type, preferred_scheme, netloc, match_subdomains, keep_matched_subdomains, serialisable_path_components, serialisable_parameters, serialisable_api_lookup_converter, can_produce_multiple_files, should_be_associated_with_files, gallery_index_type, gallery_index_identifier, gallery_index_delta, example_url )
|
|
|
|
return ( 5, new_serialisable_info )
|
|
|
|
|
|
if version == 5:
|
|
|
|
( serialisable_url_class_key, url_type, preferred_scheme, netloc, match_subdomains, keep_matched_subdomains, serialisable_path_components, serialisable_parameters, serialisable_api_lookup_converter, can_produce_multiple_files, should_be_associated_with_files, gallery_index_type, gallery_index_identifier, gallery_index_delta, example_url ) = old_serialisable_info
|
|
|
|
path_components = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_path_components )
|
|
parameters = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_parameters )
|
|
|
|
path_components = [ ( value, None ) for value in path_components ]
|
|
parameters = { key : ( value, None ) for ( key, value ) in list(parameters.items()) }
|
|
|
|
serialisable_path_components = [ ( string_match.GetSerialisableTuple(), default ) for ( string_match, default ) in path_components ]
|
|
serialisable_parameters = [ ( key, ( string_match.GetSerialisableTuple(), default ) ) for ( key, ( string_match, default ) ) in list(parameters.items()) ]
|
|
|
|
new_serialisable_info = ( serialisable_url_class_key, url_type, preferred_scheme, netloc, match_subdomains, keep_matched_subdomains, serialisable_path_components, serialisable_parameters, serialisable_api_lookup_converter, can_produce_multiple_files, should_be_associated_with_files, gallery_index_type, gallery_index_identifier, gallery_index_delta, example_url )
|
|
|
|
return ( 6, new_serialisable_info )
|
|
|
|
|
|
if version == 6:
|
|
|
|
( serialisable_url_class_key, url_type, preferred_scheme, netloc, match_subdomains, keep_matched_subdomains, serialisable_path_components, serialisable_parameters, serialisable_api_lookup_converter, can_produce_multiple_files, should_be_associated_with_files, gallery_index_type, gallery_index_identifier, gallery_index_delta, example_url ) = old_serialisable_info
|
|
|
|
send_referral_url = SEND_REFERRAL_URL_ONLY_IF_PROVIDED
|
|
referral_url_converter = ClientStrings.StringConverter( example_string = 'https://hostname.com/post/page.php?id=123456&s=view' )
|
|
|
|
serialisable_referrel_url_converter = referral_url_converter.GetSerialisableTuple()
|
|
|
|
new_serialisable_info = ( serialisable_url_class_key, url_type, preferred_scheme, netloc, match_subdomains, keep_matched_subdomains, serialisable_path_components, serialisable_parameters, serialisable_api_lookup_converter, send_referral_url, serialisable_referrel_url_converter, can_produce_multiple_files, should_be_associated_with_files, gallery_index_type, gallery_index_identifier, gallery_index_delta, example_url )
|
|
|
|
return ( 7, new_serialisable_info )
|
|
|
|
|
|
if version == 7:
|
|
|
|
( serialisable_url_class_key, url_type, preferred_scheme, netloc, match_subdomains, keep_matched_subdomains, serialisable_path_components, serialisable_parameters, serialisable_api_lookup_converter, send_referral_url, serialisable_referrel_url_converter, can_produce_multiple_files, should_be_associated_with_files, gallery_index_type, gallery_index_identifier, gallery_index_delta, example_url ) = old_serialisable_info
|
|
|
|
alphabetise_get_parameters = True
|
|
|
|
new_serialisable_info = ( serialisable_url_class_key, url_type, preferred_scheme, netloc, match_subdomains, keep_matched_subdomains, alphabetise_get_parameters, serialisable_path_components, serialisable_parameters, serialisable_api_lookup_converter, send_referral_url, serialisable_referrel_url_converter, can_produce_multiple_files, should_be_associated_with_files, gallery_index_type, gallery_index_identifier, gallery_index_delta, example_url )
|
|
|
|
return ( 8, new_serialisable_info )
|
|
|
|
|
|
if version == 8:
|
|
|
|
( serialisable_url_class_key, url_type, preferred_scheme, netloc, match_subdomains, keep_matched_subdomains, alphabetise_get_parameters, serialisable_path_components, serialisable_parameters, serialisable_api_lookup_converter, send_referral_url, serialisable_referrel_url_converter, can_produce_multiple_files, should_be_associated_with_files, gallery_index_type, gallery_index_identifier, gallery_index_delta, example_url ) = old_serialisable_info
|
|
|
|
keep_fragment = False
|
|
|
|
booleans = ( match_subdomains, keep_matched_subdomains, alphabetise_get_parameters, can_produce_multiple_files, should_be_associated_with_files, keep_fragment )
|
|
|
|
new_serialisable_info = ( serialisable_url_class_key, url_type, preferred_scheme, netloc, booleans, serialisable_path_components, serialisable_parameters, serialisable_api_lookup_converter, send_referral_url, serialisable_referrel_url_converter, gallery_index_type, gallery_index_identifier, gallery_index_delta, example_url )
|
|
|
|
return ( 9, new_serialisable_info )
|
|
|
|
|
|
if version == 9:
|
|
|
|
( serialisable_url_class_key, url_type, preferred_scheme, netloc, booleans, serialisable_path_components, serialisable_parameters, serialisable_api_lookup_converter, send_referral_url, serialisable_referrel_url_converter, gallery_index_type, gallery_index_identifier, gallery_index_delta, example_url ) = old_serialisable_info
|
|
|
|
header_overrides = {}
|
|
|
|
serialisable_header_overrides = list( header_overrides.items() )
|
|
|
|
new_serialisable_info = ( serialisable_url_class_key, url_type, preferred_scheme, netloc, booleans, serialisable_path_components, serialisable_parameters, serialisable_header_overrides, serialisable_api_lookup_converter, send_referral_url, serialisable_referrel_url_converter, gallery_index_type, gallery_index_identifier, gallery_index_delta, example_url )
|
|
|
|
return ( 10, new_serialisable_info )
|
|
|
|
|
|
if version == 10:
|
|
|
|
( serialisable_url_class_key, url_type, preferred_scheme, netloc, booleans, serialisable_path_components, serialisable_parameters, serialisable_header_overrides, serialisable_api_lookup_converter, send_referral_url, serialisable_referrel_url_converter, gallery_index_type, gallery_index_identifier, gallery_index_delta, example_url ) = old_serialisable_info
|
|
|
|
has_single_value_parameters = False
|
|
single_value_parameters_string_match = ClientStrings.StringMatch()
|
|
|
|
serialisable_single_value_parameters_match = single_value_parameters_string_match.GetSerialisableTuple()
|
|
|
|
new_serialisable_info = (
|
|
serialisable_url_class_key,
|
|
url_type,
|
|
preferred_scheme,
|
|
netloc,
|
|
booleans,
|
|
serialisable_path_components,
|
|
serialisable_parameters,
|
|
has_single_value_parameters,
|
|
serialisable_single_value_parameters_match,
|
|
serialisable_header_overrides,
|
|
serialisable_api_lookup_converter,
|
|
send_referral_url,
|
|
serialisable_referrel_url_converter,
|
|
gallery_index_type,
|
|
gallery_index_identifier,
|
|
gallery_index_delta,
|
|
example_url
|
|
)
|
|
|
|
return ( 11, new_serialisable_info )
|
|
|
|
|
|
if version == 11:
|
|
|
|
(
|
|
serialisable_url_class_key,
|
|
url_type,
|
|
preferred_scheme,
|
|
netloc,
|
|
booleans,
|
|
serialisable_path_components,
|
|
serialisable_parameters,
|
|
has_single_value_parameters,
|
|
serialisable_single_value_parameters_match,
|
|
serialisable_header_overrides,
|
|
serialisable_api_lookup_converter,
|
|
send_referral_url,
|
|
serialisable_referrel_url_converter,
|
|
gallery_index_type,
|
|
gallery_index_identifier,
|
|
gallery_index_delta,
|
|
example_url
|
|
) = old_serialisable_info
|
|
|
|
( match_subdomains, keep_matched_subdomains, alphabetise_get_parameters, can_produce_multiple_files, should_be_associated_with_files, keep_fragment ) = booleans
|
|
|
|
no_more_path_components_than_this = False
|
|
no_more_parameters_than_this = False
|
|
|
|
booleans = ( match_subdomains, keep_matched_subdomains, alphabetise_get_parameters, no_more_path_components_than_this, no_more_parameters_than_this, can_produce_multiple_files, should_be_associated_with_files, keep_fragment )
|
|
|
|
new_serialisable_info = (
|
|
serialisable_url_class_key,
|
|
url_type,
|
|
preferred_scheme,
|
|
netloc,
|
|
booleans,
|
|
serialisable_path_components,
|
|
serialisable_parameters,
|
|
has_single_value_parameters,
|
|
serialisable_single_value_parameters_match,
|
|
serialisable_header_overrides,
|
|
serialisable_api_lookup_converter,
|
|
send_referral_url,
|
|
serialisable_referrel_url_converter,
|
|
gallery_index_type,
|
|
gallery_index_identifier,
|
|
gallery_index_delta,
|
|
example_url
|
|
)
|
|
|
|
return ( 12, new_serialisable_info )
|
|
|
|
|
|
if version == 12:
|
|
|
|
(
|
|
serialisable_url_class_key,
|
|
url_type,
|
|
preferred_scheme,
|
|
netloc,
|
|
booleans,
|
|
serialisable_path_components,
|
|
serialisable_parameters,
|
|
has_single_value_parameters,
|
|
serialisable_single_value_parameters_match,
|
|
serialisable_header_overrides,
|
|
serialisable_api_lookup_converter,
|
|
send_referral_url,
|
|
serialisable_referrel_url_converter,
|
|
gallery_index_type,
|
|
gallery_index_identifier,
|
|
gallery_index_delta,
|
|
example_url
|
|
) = old_serialisable_info
|
|
|
|
def encode_fixed_string_match_param( s_m: ClientStrings.StringMatch ) -> ClientStrings.StringMatch:
|
|
|
|
( match_type, match_value, min_chars, max_chars, example_string ) = s_m.ToTuple()
|
|
|
|
if match_type == ClientStrings.STRING_MATCH_FIXED:
|
|
|
|
match_value = ClientNetworkingFunctions.ensure_param_component_is_encoded( match_value )
|
|
example_string = ClientNetworkingFunctions.ensure_param_component_is_encoded( example_string )
|
|
|
|
s_m = ClientStrings.StringMatch(
|
|
match_type = match_type,
|
|
match_value = match_value,
|
|
min_chars = min_chars,
|
|
max_chars = max_chars,
|
|
example_string = example_string
|
|
)
|
|
|
|
|
|
return s_m
|
|
|
|
|
|
def encode_fixed_string_match_path( s_m: ClientStrings.StringMatch ) -> ClientStrings.StringMatch:
|
|
|
|
( match_type, match_value, min_chars, max_chars, example_string ) = s_m.ToTuple()
|
|
|
|
if match_type == ClientStrings.STRING_MATCH_FIXED:
|
|
|
|
match_value = ClientNetworkingFunctions.ensure_path_component_is_encoded( match_value )
|
|
example_string = ClientNetworkingFunctions.ensure_path_component_is_encoded( example_string )
|
|
|
|
s_m = ClientStrings.StringMatch(
|
|
match_type = match_type,
|
|
match_value = match_value,
|
|
min_chars = min_chars,
|
|
max_chars = max_chars,
|
|
example_string = example_string
|
|
)
|
|
|
|
|
|
return s_m
|
|
|
|
|
|
new_parameters = HydrusSerialisable.SerialisableList()
|
|
|
|
for ( name, ( serialisable_value_string_match, default_value ) ) in serialisable_parameters:
|
|
|
|
# we are converting from post[id] to post%5Bid%5D
|
|
name = ClientNetworkingFunctions.ensure_param_component_is_encoded( name )
|
|
|
|
value_string_match = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_value_string_match )
|
|
|
|
value_string_match = encode_fixed_string_match_param( value_string_match )
|
|
|
|
parameter = URLClassParameterFixedName(
|
|
name = name,
|
|
value_string_match = value_string_match
|
|
)
|
|
|
|
if default_value is not None:
|
|
|
|
default_value = ClientNetworkingFunctions.ensure_param_component_is_encoded( default_value )
|
|
|
|
parameter.SetDefaultValue( default_value )
|
|
|
|
|
|
new_parameters.append( parameter )
|
|
|
|
|
|
serialisable_parameters = new_parameters.GetSerialisableTuple()
|
|
|
|
path_components = [ ( HydrusSerialisable.CreateFromSerialisableTuple( serialisable_string_match ), default ) for ( serialisable_string_match, default ) in serialisable_path_components ]
|
|
|
|
new_path_components = []
|
|
|
|
for ( string_match, default ) in path_components:
|
|
|
|
string_match = encode_fixed_string_match_path( string_match )
|
|
|
|
if default is not None:
|
|
|
|
default = ClientNetworkingFunctions.ensure_path_component_is_encoded( default )
|
|
|
|
|
|
new_path_components.append( ( string_match, default ) )
|
|
|
|
|
|
serialisable_path_components = [ ( string_match.GetSerialisableTuple(), default ) for ( string_match, default ) in new_path_components ]
|
|
|
|
new_serialisable_info = (
|
|
serialisable_url_class_key,
|
|
url_type,
|
|
preferred_scheme,
|
|
netloc,
|
|
booleans,
|
|
serialisable_path_components,
|
|
serialisable_parameters,
|
|
has_single_value_parameters,
|
|
serialisable_single_value_parameters_match,
|
|
serialisable_header_overrides,
|
|
serialisable_api_lookup_converter,
|
|
send_referral_url,
|
|
serialisable_referrel_url_converter,
|
|
gallery_index_type,
|
|
gallery_index_identifier,
|
|
gallery_index_delta,
|
|
example_url
|
|
)
|
|
|
|
return ( 13, new_serialisable_info )
|
|
|
|
|
|
if version == 13:
|
|
|
|
(
|
|
serialisable_url_class_key,
|
|
url_type,
|
|
preferred_scheme,
|
|
netloc,
|
|
booleans,
|
|
serialisable_path_components,
|
|
serialisable_parameters,
|
|
has_single_value_parameters,
|
|
serialisable_single_value_parameters_match,
|
|
serialisable_header_overrides,
|
|
serialisable_api_lookup_converter,
|
|
send_referral_url,
|
|
serialisable_referrel_url_converter,
|
|
gallery_index_type,
|
|
gallery_index_identifier,
|
|
gallery_index_delta,
|
|
example_url
|
|
) = old_serialisable_info
|
|
|
|
( match_subdomains, keep_matched_subdomains, alphabetise_get_parameters, no_more_path_components_than_this, no_more_parameters_than_this, can_produce_multiple_files, should_be_associated_with_files, keep_fragment ) = booleans
|
|
|
|
api_lookup_converter = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_api_lookup_converter )
|
|
|
|
keep_extra_parameters_for_server = True
|
|
|
|
if no_more_parameters_than_this or api_lookup_converter.MakesChanges() or url_type not in ( HC.URL_TYPE_GALLERY, HC.URL_TYPE_WATCHABLE ):
|
|
|
|
keep_extra_parameters_for_server = False
|
|
|
|
|
|
booleans = ( match_subdomains, keep_matched_subdomains, alphabetise_get_parameters, no_more_path_components_than_this, no_more_parameters_than_this, keep_extra_parameters_for_server, can_produce_multiple_files, should_be_associated_with_files, keep_fragment )
|
|
|
|
new_serialisable_info = (
|
|
serialisable_url_class_key,
|
|
url_type,
|
|
preferred_scheme,
|
|
netloc,
|
|
booleans,
|
|
serialisable_path_components,
|
|
serialisable_parameters,
|
|
has_single_value_parameters,
|
|
serialisable_single_value_parameters_match,
|
|
serialisable_header_overrides,
|
|
serialisable_api_lookup_converter,
|
|
send_referral_url,
|
|
serialisable_referrel_url_converter,
|
|
gallery_index_type,
|
|
gallery_index_identifier,
|
|
gallery_index_delta,
|
|
example_url
|
|
)
|
|
|
|
return ( 14, new_serialisable_info )
|
|
|
|
|
|
|
|
def AlphabetiseGetParameters( self ):
|
|
|
|
return self._alphabetise_get_parameters
|
|
|
|
|
|
def CanGenerateNextGalleryPage( self ):
|
|
|
|
if self._url_type == HC.URL_TYPE_GALLERY:
|
|
|
|
if self._gallery_index_type is not None:
|
|
|
|
return True
|
|
|
|
|
|
|
|
return False
|
|
|
|
|
|
def CanReferToMultipleFiles( self ):
|
|
|
|
is_a_gallery_page = self._url_type in ( HC.URL_TYPE_GALLERY, HC.URL_TYPE_WATCHABLE )
|
|
|
|
is_a_multipost_post_page = self._url_type == HC.URL_TYPE_POST and self._can_produce_multiple_files
|
|
|
|
return is_a_gallery_page or is_a_multipost_post_page
|
|
|
|
|
|
def GetAPILookupConverter( self ):
|
|
|
|
return self._api_lookup_converter
|
|
|
|
|
|
def GetAPIURL( self, url ):
|
|
|
|
request_url = self.Normalise( url, for_server = True )
|
|
|
|
return self._api_lookup_converter.Convert( request_url )
|
|
|
|
|
|
def GetClassKey( self ):
|
|
|
|
return self._url_class_key
|
|
|
|
|
|
def GetDomain( self ):
|
|
|
|
return self._netloc
|
|
|
|
|
|
def GetExampleURL( self ):
|
|
|
|
return self._example_url
|
|
|
|
|
|
def GetGalleryIndexValues( self ):
|
|
|
|
return ( self._gallery_index_type, self._gallery_index_identifier, self._gallery_index_delta )
|
|
|
|
|
|
def GetHeaderOverrides( self ):
|
|
|
|
return self._header_overrides
|
|
|
|
|
|
def GetNetloc( self ):
|
|
|
|
return self._netloc
|
|
|
|
|
|
def GetNextGalleryPage( self, url ):
|
|
|
|
url = self.Normalise( url, for_server = True )
|
|
|
|
p = ClientNetworkingFunctions.ParseURL( url )
|
|
|
|
scheme = p.scheme
|
|
netloc = p.netloc
|
|
path = p.path
|
|
query = p.query
|
|
params = ''
|
|
fragment = p.fragment
|
|
|
|
if self._gallery_index_type == GALLERY_INDEX_TYPE_PATH_COMPONENT:
|
|
|
|
page_index_path_component_index = self._gallery_index_identifier
|
|
|
|
path_components = ClientNetworkingFunctions.ConvertPathTextToList( path )
|
|
|
|
try:
|
|
|
|
page_index = path_components[ page_index_path_component_index ]
|
|
|
|
except IndexError:
|
|
|
|
raise HydrusExceptions.URLClassException( 'Could not generate next gallery page--not enough path components!' )
|
|
|
|
|
|
try:
|
|
|
|
page_index = int( page_index )
|
|
|
|
except:
|
|
|
|
raise HydrusExceptions.URLClassException( 'Could not generate next gallery page--index component was not an integer!' )
|
|
|
|
|
|
path_components[ page_index_path_component_index ] = str( page_index + self._gallery_index_delta )
|
|
|
|
path = '/' + '/'.join( path_components )
|
|
|
|
elif self._gallery_index_type == GALLERY_INDEX_TYPE_PARAMETER:
|
|
|
|
page_index_name = self._gallery_index_identifier
|
|
|
|
( query_dict, single_value_parameters, param_order ) = ClientNetworkingFunctions.ConvertQueryTextToDict( query )
|
|
|
|
if page_index_name not in query_dict:
|
|
|
|
raise HydrusExceptions.URLClassException( 'Could not generate next gallery page--did not find ' + str( self._gallery_index_identifier ) + ' in parameters!' )
|
|
|
|
|
|
page_index = query_dict[ page_index_name ]
|
|
|
|
try:
|
|
|
|
page_index = int( page_index )
|
|
|
|
except:
|
|
|
|
raise HydrusExceptions.URLClassException( 'Could not generate next gallery page--index component was not an integer!' )
|
|
|
|
|
|
query_dict[ page_index_name ] = page_index + self._gallery_index_delta
|
|
|
|
if self._alphabetise_get_parameters:
|
|
|
|
param_order = None
|
|
|
|
|
|
if not self._has_single_value_parameters:
|
|
|
|
single_value_parameters = []
|
|
|
|
|
|
query = ClientNetworkingFunctions.ConvertQueryDictToText( query_dict, single_value_parameters, param_order = param_order )
|
|
|
|
else:
|
|
|
|
raise NotImplementedError( 'Did not understand the next gallery page rules!' )
|
|
|
|
|
|
next_gallery_url = urllib.parse.urlunparse( ( scheme, netloc, path, params, query, fragment ) )
|
|
|
|
return next_gallery_url
|
|
|
|
|
|
def GetParameters( self ) -> typing.List[ URLClassParameterFixedName ]:
|
|
|
|
return self._parameters
|
|
|
|
|
|
def GetPathComponents( self ):
|
|
|
|
return self._path_components
|
|
|
|
|
|
def GetPreferredScheme( self ):
|
|
|
|
return self._preferred_scheme
|
|
|
|
|
|
def GetReferralURL( self, url, referral_url ):
|
|
|
|
if self._send_referral_url == SEND_REFERRAL_URL_ONLY_IF_PROVIDED:
|
|
|
|
return referral_url
|
|
|
|
elif self._send_referral_url == SEND_REFERRAL_URL_NEVER:
|
|
|
|
return None
|
|
|
|
elif self._send_referral_url in ( SEND_REFERRAL_URL_CONVERTER_IF_NONE_PROVIDED, SEND_REFERRAL_URL_ONLY_CONVERTER ):
|
|
|
|
request_url = self.Normalise( url, for_server = True )
|
|
|
|
try:
|
|
|
|
converted_referral_url = self._referral_url_converter.Convert( request_url )
|
|
|
|
except HydrusExceptions.StringConvertException:
|
|
|
|
return referral_url
|
|
|
|
|
|
p1 = self._send_referral_url == SEND_REFERRAL_URL_ONLY_CONVERTER
|
|
p2 = self._send_referral_url == SEND_REFERRAL_URL_CONVERTER_IF_NONE_PROVIDED and referral_url is None
|
|
|
|
if p1 or p2:
|
|
|
|
return converted_referral_url
|
|
|
|
else:
|
|
|
|
return referral_url
|
|
|
|
|
|
|
|
return referral_url
|
|
|
|
|
|
def GetReferralURLInfo( self ):
|
|
|
|
return ( self._send_referral_url, self._referral_url_converter )
|
|
|
|
|
|
def GetSafeSummary( self ):
|
|
|
|
return 'URL Class "' + self._name + '" - ' + ClientNetworkingFunctions.ConvertURLIntoDomain( self.GetExampleURL() )
|
|
|
|
|
|
def GetSingleValueParameterData( self ):
|
|
|
|
return ( self._has_single_value_parameters, self._single_value_parameters_string_match )
|
|
|
|
|
|
def GetSortingComplexityKey( self ):
|
|
|
|
# we sort url classes so that
|
|
# site.com/post/123456
|
|
# comes before
|
|
# site.com/search?query=blah
|
|
|
|
# I used to do gallery first, then post, then file, but it ultimately was unhelpful in some situations and better handled by strict component/parameter matching
|
|
|
|
num_required_path_components = len( [ 1 for ( string_match, default ) in self._path_components if default is None ] )
|
|
num_total_path_components = len( self._path_components )
|
|
num_required_parameters = len( [ 1 for parameter in self._parameters if not parameter.HasDefaultValue() ] )
|
|
num_total_parameters = len( self._parameters )
|
|
|
|
try:
|
|
|
|
len_example_url = len( self.Normalise( self._example_url, for_server = True ) )
|
|
|
|
except:
|
|
|
|
len_example_url = len( self._example_url )
|
|
|
|
|
|
return ( num_required_path_components, num_total_path_components, num_required_parameters, num_total_parameters, len_example_url )
|
|
|
|
|
|
def GetURLBooleans( self ):
|
|
|
|
return ( self._match_subdomains, self._keep_matched_subdomains, self._alphabetise_get_parameters, self._can_produce_multiple_files, self._should_be_associated_with_files, self._keep_fragment )
|
|
|
|
|
|
def GetURLType( self ):
|
|
|
|
return self._url_type
|
|
|
|
|
|
def IsGalleryURL( self ):
|
|
|
|
return self._url_type == HC.URL_TYPE_GALLERY
|
|
|
|
|
|
def IsParsable( self ):
|
|
|
|
return self._url_type in ( HC.URL_TYPE_POST, HC.URL_TYPE_GALLERY, HC.URL_TYPE_WATCHABLE )
|
|
|
|
|
|
def IsPostURL( self ):
|
|
|
|
return self._url_type == HC.URL_TYPE_POST
|
|
|
|
|
|
def IsWatchableURL( self ):
|
|
|
|
return self._url_type == HC.URL_TYPE_WATCHABLE
|
|
|
|
|
|
def KeepExtraParametersForServer( self ):
|
|
|
|
return self._keep_extra_parameters_for_server
|
|
|
|
|
|
def Matches( self, url ):
|
|
|
|
try:
|
|
|
|
self.Test( url )
|
|
|
|
return True
|
|
|
|
except HydrusExceptions.URLClassException:
|
|
|
|
return False
|
|
|
|
|
|
|
|
def MatchesSubdomains( self ):
|
|
|
|
return self._match_subdomains
|
|
|
|
|
|
def Normalise( self, url, for_server = False ):
|
|
|
|
p = ClientNetworkingFunctions.ParseURL( url )
|
|
|
|
scheme = self._preferred_scheme
|
|
params = ''
|
|
|
|
if self._keep_fragment:
|
|
|
|
fragment = p.fragment
|
|
|
|
else:
|
|
|
|
fragment = ''
|
|
|
|
|
|
path_components = ClientNetworkingFunctions.ConvertPathTextToList( p.path )
|
|
( query_dict, single_value_parameters, param_order ) = ClientNetworkingFunctions.ConvertQueryTextToDict( p.query )
|
|
|
|
netloc = self._ClipNetLoc( p.netloc )
|
|
path = self._ClipAndFleshOutPath( path_components, for_server )
|
|
query = self._ClipAndFleshOutQuery( query_dict, single_value_parameters, param_order, for_server )
|
|
|
|
normalised_url = urllib.parse.urlunparse( ( scheme, netloc, path, params, query, fragment ) )
|
|
|
|
return normalised_url
|
|
|
|
|
|
def NoMorePathComponentsThanThis( self ) -> bool:
|
|
|
|
return self._no_more_path_components_than_this
|
|
|
|
|
|
def NoMoreParametersThanThis( self ) -> bool:
|
|
|
|
return self._no_more_parameters_than_this
|
|
|
|
|
|
def RefersToOneFile( self ):
|
|
|
|
is_a_direct_file_page = self._url_type == HC.URL_TYPE_FILE
|
|
|
|
is_a_single_file_post_page = self._url_type == HC.URL_TYPE_POST and not self._can_produce_multiple_files
|
|
|
|
return is_a_direct_file_page or is_a_single_file_post_page
|
|
|
|
|
|
def RegenerateClassKey( self ):
|
|
|
|
self._url_class_key = HydrusData.GenerateKey()
|
|
|
|
|
|
def SetAlphabetiseGetParameters( self, alphabetise_get_parameters: bool ):
|
|
|
|
self._alphabetise_get_parameters = alphabetise_get_parameters
|
|
|
|
|
|
def SetClassKey( self, match_key ):
|
|
|
|
self._url_class_key = match_key
|
|
|
|
|
|
def SetExampleURL( self, example_url ):
|
|
|
|
self._example_url = example_url
|
|
|
|
|
|
def SetKeepExtraParametersForServer( self, value ):
|
|
|
|
self._keep_extra_parameters_for_server = value
|
|
|
|
|
|
def SetNoMorePathComponentsThanThis( self, no_more: bool ):
|
|
|
|
self._no_more_path_components_than_this = no_more
|
|
|
|
|
|
def SetNoMoreParametersThanThis( self, no_more: bool ):
|
|
|
|
self._no_more_parameters_than_this = no_more
|
|
|
|
|
|
def SetSingleValueParameterData( self, has_single_value_parameters: bool, single_value_parameters_string_match: ClientStrings.StringMatch ):
|
|
|
|
self._has_single_value_parameters = has_single_value_parameters
|
|
self._single_value_parameters_string_match = single_value_parameters_string_match
|
|
|
|
|
|
def SetURLBooleans(
|
|
self,
|
|
match_subdomains: bool,
|
|
keep_matched_subdomains: bool,
|
|
alphabetise_get_parameters: bool,
|
|
can_produce_multiple_files: bool,
|
|
should_be_associated_with_files: bool,
|
|
keep_fragment: bool
|
|
):
|
|
|
|
self._match_subdomains = match_subdomains
|
|
self._keep_matched_subdomains = keep_matched_subdomains
|
|
self._alphabetise_get_parameters = alphabetise_get_parameters
|
|
self._can_produce_multiple_files = can_produce_multiple_files
|
|
self._should_be_associated_with_files = should_be_associated_with_files
|
|
self._keep_fragment = keep_fragment
|
|
|
|
|
|
def ShouldAssociateWithFiles( self ):
|
|
|
|
return self._should_be_associated_with_files
|
|
|
|
|
|
def Test( self, url ):
|
|
|
|
p = ClientNetworkingFunctions.ParseURL( url )
|
|
|
|
if self._match_subdomains:
|
|
|
|
if p.netloc != self._netloc and not p.netloc.endswith( '.' + self._netloc ):
|
|
|
|
raise HydrusExceptions.URLClassException( p.netloc + ' (potentially excluding subdomains) did not match ' + self._netloc )
|
|
|
|
|
|
else:
|
|
|
|
if not ClientNetworkingFunctions.DomainEqualsAnotherForgivingWWW( p.netloc, self._netloc ):
|
|
|
|
raise HydrusExceptions.URLClassException( p.netloc + ' did not match ' + self._netloc )
|
|
|
|
|
|
|
|
path = p.path
|
|
query = p.query
|
|
|
|
path_components = ClientNetworkingFunctions.ConvertPathTextToList( path )
|
|
( query_dict, single_value_parameters, param_order ) = ClientNetworkingFunctions.ConvertQueryTextToDict( query )
|
|
|
|
if self._no_more_path_components_than_this:
|
|
|
|
if len( path_components ) > len( self._path_components ):
|
|
|
|
raise HydrusExceptions.URLClassException( '"{}" has {} path components, but I will not allow more than my defined {}!'.format( path, len( path_components ), len( self._path_components ) ) )
|
|
|
|
|
|
|
|
for ( index, ( string_match, default ) ) in enumerate( self._path_components ):
|
|
|
|
if len( path_components ) > index:
|
|
|
|
path_component = path_components[ index ]
|
|
|
|
try:
|
|
|
|
string_match.Test( path_component )
|
|
|
|
except HydrusExceptions.StringMatchException as e:
|
|
|
|
raise HydrusExceptions.URLClassException( str( e ) )
|
|
|
|
|
|
elif default is None:
|
|
|
|
if index + 1 == len( self._path_components ):
|
|
|
|
message = '"{}" has {} path components, but I was expecting {}!'.format( path, len( path_components ), len( self._path_components ) )
|
|
|
|
else:
|
|
|
|
message = '"{}" has {} path components, but I was expecting at least {} and maybe as many as {}!'.format( path, len( path_components ), index + 1, len( self._path_components ) )
|
|
|
|
|
|
raise HydrusExceptions.URLClassException( message )
|
|
|
|
|
|
|
|
if self._no_more_parameters_than_this:
|
|
|
|
good_fixed_names = { parameter.GetName() for parameter in self._parameters if isinstance( parameter, URLClassParameterFixedName ) }
|
|
|
|
for ( name, value ) in query_dict.items():
|
|
|
|
if name not in good_fixed_names:
|
|
|
|
raise HydrusExceptions.URLClassException( f'"This has a "{name}" parameter, but I am set to not allow any unexpected parameters!' )
|
|
|
|
|
|
|
|
|
|
for parameter in self._parameters:
|
|
|
|
if isinstance( parameter, URLClassParameterFixedName ):
|
|
|
|
name = parameter.GetName()
|
|
|
|
if name not in query_dict:
|
|
|
|
if parameter.MustBeInOriginalURL():
|
|
|
|
raise HydrusExceptions.URLClassException( f'{name} not found in {p.query}' )
|
|
|
|
else:
|
|
|
|
continue
|
|
|
|
|
|
|
|
value = query_dict[ name ]
|
|
|
|
try:
|
|
|
|
parameter.TestValue( value )
|
|
|
|
except HydrusExceptions.StringMatchException as e:
|
|
|
|
raise HydrusExceptions.URLClassException( f'Problem with {name}: ' + str( e ) )
|
|
|
|
|
|
|
|
|
|
if len( single_value_parameters ) > 0 and not self._has_single_value_parameters and self._no_more_parameters_than_this:
|
|
|
|
raise HydrusExceptions.URLClassException( '"{}" has unexpected single-value parameters, but I am set to not allow any unexpected parameters!'.format( query ) )
|
|
|
|
|
|
if self._has_single_value_parameters:
|
|
|
|
if len( single_value_parameters ) == 0:
|
|
|
|
raise HydrusExceptions.URLClassException( 'Was expecting single-value parameter(s), but this URL did not seem to have any.' )
|
|
|
|
|
|
for single_value_parameter in single_value_parameters:
|
|
|
|
try:
|
|
|
|
self._single_value_parameters_string_match.Test( single_value_parameter )
|
|
|
|
except HydrusExceptions.StringMatchException as e:
|
|
|
|
raise HydrusExceptions.URLClassException( str( e ) )
|
|
|
|
|
|
|
|
|
|
|
|
def UsesAPIURL( self ):
|
|
|
|
return self._api_lookup_converter.MakesChanges()
|
|
|
|
|
|
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_URL_CLASS ] = URLClass
|