hydrus/include/ClientNetworkingDomain.py

3105 lines
105 KiB
Python
Raw Normal View History

2017-09-13 20:50:41 +00:00
import ClientConstants as CC
2018-10-03 21:00:15 +00:00
import ClientNetworkingContexts
2017-09-27 21:52:54 +00:00
import ClientParsing
2017-10-04 17:51:58 +00:00
import ClientThreading
import collections
2017-09-13 20:50:41 +00:00
import HydrusConstants as HC
import HydrusGlobals as HG
import HydrusData
import HydrusExceptions
2018-10-03 21:00:15 +00:00
import HydrusNetworking
2017-09-27 21:52:54 +00:00
import HydrusSerialisable
2017-10-11 17:38:14 +00:00
import os
2017-10-25 21:45:15 +00:00
import re
2017-09-13 20:50:41 +00:00
import threading
2017-10-04 17:51:58 +00:00
import time
2018-04-18 22:10:15 +00:00
import urllib
2017-09-13 20:50:41 +00:00
import urlparse
2018-08-22 21:10:59 +00:00
def AlphabetiseQueryText( query_text ):
2018-04-18 22:10:15 +00:00
2018-08-22 21:10:59 +00:00
return ConvertQueryDictToText( ConvertQueryTextToDict( query_text ) )
2018-04-18 22:10:15 +00:00
2017-10-04 17:51:58 +00:00
def ConvertDomainIntoAllApplicableDomains( domain ):
2017-10-25 21:45:15 +00:00
# is an ip address, possibly with a port
2018-02-07 23:40:33 +00:00
if re.search( r'^[\d\.):]+$', domain ) is not None:
return [ domain ]
if domain == 'localhost':
2017-10-25 21:45:15 +00:00
return [ domain ]
2017-10-04 17:51:58 +00:00
domains = []
while domain.count( '.' ) > 0:
2017-11-01 20:37:39 +00:00
# let's discard www.blah.com and www2.blah.com so we don't end up tracking it separately to blah.com--there's not much point!
2017-10-04 17:51:58 +00:00
startswith_www = domain.count( '.' ) > 1 and domain.startswith( 'www' )
if not startswith_www:
domains.append( domain )
domain = '.'.join( domain.split( '.' )[1:] ) # i.e. strip off the leftmost subdomain maps.google.com -> google.com
return domains
2017-11-01 20:37:39 +00:00
def ConvertDomainIntoSecondLevelDomain( domain ):
2018-05-09 20:23:00 +00:00
domains = ConvertDomainIntoAllApplicableDomains( domain )
if len( domains ) == 0:
raise HydrusExceptions.URLMatchException( 'That url or domain did not seem to be valid!' )
return domains[-1]
2018-04-25 22:07:52 +00:00
def ConvertHTTPSToHTTP( url ):
if url.startswith( 'http://' ):
return url
elif url.startswith( 'https://' ):
http_url = 'http://' + url[8:]
return http_url
else:
raise Exception( 'Given a url that did not have a scheme!' )
def ConvertHTTPToHTTPS( url ):
if url.startswith( 'https://' ):
return url
elif url.startswith( 'http://' ):
https_url = 'https://' + url[7:]
return https_url
else:
raise Exception( 'Given a url that did not have a scheme!' )
2018-08-22 21:10:59 +00:00
def ConvertQueryDictToText( query_dict ):
# we now do everything with requests, which does all the unicode -> ascii -> %20 business naturally, phew
# so lets just stick with unicode, which we still want to call explicitly to coerce integers and so on that'll slip in here and there
param_pairs = list( query_dict.items() )
param_pairs.sort()
2018-10-03 21:00:15 +00:00
query_text = u'&'.join( ( HydrusData.ToUnicode( key ) + u'=' + HydrusData.ToUnicode( value ) for ( key, value ) in param_pairs ) )
2018-08-22 21:10:59 +00:00
return query_text
def ConvertQueryTextToDict( query_text ):
2018-10-03 21:00:15 +00:00
# we generally do not want quote characters %20 stuff, in our urls. we would prefer regular ascii and even unicode
# first we will decode all unicode, which allows urllib to work
query_text = HydrusData.ToByteString( query_text )
2018-08-29 20:20:41 +00:00
query_dict = {}
pairs = query_text.split( '&' )
for pair in pairs:
result = pair.split( '=', 1 )
# for the moment, ignore tracker bugs and so on that have only key and no value
if len( result ) == 2:
2018-10-03 21:00:15 +00:00
# so, let's replace all keys and values with unquoted versions
# -but-
# we only replace if it is a completely reversable operation!
# odd situations like '6+girls+skirt', which comes here encoded as '6%2Bgirls+skirt', shouldn't turn into '6+girls+skirt'
# so if there are a mix of encoded and non-encoded, we won't touch it here m8
# we convert to unicode afterwards so %E5%B0%BB%E7%A5%9E%E6%A7%98 -> \xe5\xb0\xbb\xe7\xa5\x9e\xe6\xa7\x98 -> \u5c3b\u795e\u69d8
2018-08-29 20:20:41 +00:00
( key, value ) = result
try:
2018-10-03 21:00:15 +00:00
unquoted_key = urllib.unquote( key )
requoted_key = urllib.quote( unquoted_key )
if key == requoted_key:
key = HydrusData.ToUnicode( unquoted_key )
2018-08-29 20:20:41 +00:00
except:
pass
try:
2018-10-03 21:00:15 +00:00
unquoted_value = urllib.unquote( value )
requoted_value = urllib.quote( unquoted_value )
if value == requoted_value:
value = HydrusData.ToUnicode( unquoted_value )
2018-08-29 20:20:41 +00:00
except:
pass
query_dict[ key ] = value
2018-08-22 21:10:59 +00:00
return query_dict
2018-02-07 23:40:33 +00:00
def ConvertURLMatchesIntoAPIPairs( url_matches ):
2018-09-19 21:54:51 +00:00
url_matches = list( url_matches )
NetworkDomainManager.STATICSortURLMatchesDescendingComplexity( url_matches )
2018-02-07 23:40:33 +00:00
pairs = []
for url_match in url_matches:
if not url_match.UsesAPIURL():
continue
api_url = url_match.GetAPIURL( url_match.GetExampleURL() )
for other_url_match in url_matches:
if other_url_match == url_match:
continue
if other_url_match.Matches( api_url ):
pairs.append( ( url_match, other_url_match ) )
2018-09-19 21:54:51 +00:00
break
2018-02-07 23:40:33 +00:00
return pairs
2017-11-01 20:37:39 +00:00
2017-10-04 17:51:58 +00:00
def ConvertURLIntoDomain( url ):
parser_result = urlparse.urlparse( url )
2018-03-14 21:01:02 +00:00
if parser_result.scheme == '':
2018-04-25 22:07:52 +00:00
raise HydrusExceptions.URLMatchException( 'URL "' + url + '" was not recognised--did you forget the http:// or https://?' )
2018-03-14 21:01:02 +00:00
if parser_result.netloc == '':
raise HydrusExceptions.URLMatchException( 'URL "' + url + '" was not recognised--is it missing a domain?' )
2017-10-04 17:51:58 +00:00
domain = HydrusData.ToByteString( parser_result.netloc )
return domain
2018-08-22 21:10:59 +00:00
def ConvertURLIntoSecondLevelDomain( url ):
domain = ConvertURLIntoDomain( url )
return ConvertDomainIntoSecondLevelDomain( domain )
2018-04-25 22:07:52 +00:00
def DomainEqualsAnotherForgivingWWW( test_domain, wwwable_domain ):
# domain is either the same or starts with www. or www2. or something
rule = r'^(www[^\.]*\.)?' + re.escape( wwwable_domain ) + '$'
return re.search( rule, test_domain ) is not None
2017-11-01 20:37:39 +00:00
def GetCookie( cookies, search_domain, name ):
existing_domains = cookies.list_domains()
for existing_domain in existing_domains:
# blah.com is viewable by blah.com
matches_exactly = existing_domain == search_domain
# .blah.com is viewable by blah.com
matches_dot = existing_domain == '.' + search_domain
# .blah.com applies to subdomain.blah.com, blah.com does not
valid_subdomain = existing_domain.startwith( '.' ) and search_domain.endswith( existing_domain )
if matches_exactly or matches_dot or valid_subdomain:
cookie_dict = cookies.get_dict( existing_domain )
if name in cookie_dict:
return cookie_dict[ name ]
raise HydrusExceptions.DataMissing( 'Cookie ' + name + ' not found for domain ' + search_domain + '!' )
2018-04-25 22:07:52 +00:00
def GetSearchURLs( url ):
search_urls = set()
search_urls.add( url )
normalised_url = HG.client_controller.network_engine.domain_manager.NormaliseURL( url )
search_urls.add( normalised_url )
for url in list( search_urls ):
if url.startswith( 'http://' ):
search_urls.add( ConvertHTTPToHTTPS( url ) )
elif url.startswith( 'https://' ):
search_urls.add( ConvertHTTPSToHTTP( url ) )
2018-08-01 20:44:57 +00:00
for url in list( search_urls ):
p = urlparse.urlparse( url )
scheme = p.scheme
netloc = p.netloc
path = p.path
params = ''
query = p.query
fragment = ''
if netloc.startswith( 'www' ):
try:
netloc = ConvertDomainIntoSecondLevelDomain( netloc )
except HydrusExceptions.URLMatchException:
continue
else:
netloc = 'www.' + netloc
r = urlparse.ParseResult( scheme, netloc, path, params, query, fragment )
search_urls.add( r.geturl() )
2018-04-25 22:07:52 +00:00
return search_urls
2017-10-04 17:51:58 +00:00
VALID_DENIED = 0
VALID_APPROVED = 1
VALID_UNKNOWN = 2
2017-10-11 17:38:14 +00:00
valid_str_lookup = {}
valid_str_lookup[ VALID_DENIED ] = 'denied'
valid_str_lookup[ VALID_APPROVED ] = 'approved'
valid_str_lookup[ VALID_UNKNOWN ] = 'unknown'
2017-10-04 17:51:58 +00:00
class NetworkDomainManager( HydrusSerialisable.SerialisableBase ):
2017-10-11 17:38:14 +00:00
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_NETWORK_DOMAIN_MANAGER
2017-11-29 21:48:23 +00:00
SERIALISABLE_NAME = 'Domain Manager'
2018-09-05 20:52:32 +00:00
SERIALISABLE_VERSION = 6
2017-09-13 20:50:41 +00:00
2017-10-04 17:51:58 +00:00
def __init__( self ):
2017-09-13 20:50:41 +00:00
2017-10-04 17:51:58 +00:00
HydrusSerialisable.SerialisableBase.__init__( self )
self.engine = None
2018-08-22 21:10:59 +00:00
self._gugs = HydrusSerialisable.SerialisableList()
2017-10-04 17:51:58 +00:00
self._url_matches = HydrusSerialisable.SerialisableList()
2018-01-17 22:52:10 +00:00
self._parsers = HydrusSerialisable.SerialisableList()
2017-10-11 17:38:14 +00:00
self._network_contexts_to_custom_header_dicts = collections.defaultdict( dict )
2017-10-04 17:51:58 +00:00
2018-08-08 20:29:54 +00:00
self._parser_namespaces = []
2018-09-05 20:52:32 +00:00
self._gug_keys_to_display = set()
2018-01-17 22:52:10 +00:00
self._url_match_keys_to_display = set()
self._url_match_keys_to_parser_keys = HydrusSerialisable.SerialisableBytesDictionary()
2017-12-06 22:06:56 +00:00
2017-10-04 17:51:58 +00:00
self._domains_to_url_matches = collections.defaultdict( list )
2018-04-18 22:10:15 +00:00
import ClientImportOptions
self._file_post_default_tag_import_options = ClientImportOptions.TagImportOptions()
self._watchable_default_tag_import_options = ClientImportOptions.TagImportOptions()
self._url_match_keys_to_default_tag_import_options = {}
2018-09-05 20:52:32 +00:00
self._gug_keys_to_gugs = {}
self._gug_names_to_gugs = {}
2018-01-17 22:52:10 +00:00
self._parser_keys_to_parsers = {}
2017-10-04 17:51:58 +00:00
self._dirty = False
2017-09-13 20:50:41 +00:00
self._lock = threading.Lock()
2017-10-04 17:51:58 +00:00
self._RecalcCache()
2017-09-13 20:50:41 +00:00
2018-07-04 20:48:28 +00:00
def _GetDefaultTagImportOptionsForURL( self, url ):
url_match = self._GetURLMatch( url )
if url_match is None or url_match.GetURLType() not in ( HC.URL_TYPE_POST, HC.URL_TYPE_WATCHABLE ):
2018-08-08 20:29:54 +00:00
return self._file_post_default_tag_import_options
2018-07-04 20:48:28 +00:00
try:
( url_match, url ) = self._GetNormalisedAPIURLMatchAndURL( url )
except HydrusExceptions.URLMatchException:
2018-08-08 20:29:54 +00:00
return self._file_post_default_tag_import_options
2018-07-04 20:48:28 +00:00
2018-04-18 22:10:15 +00:00
url_match_key = url_match.GetMatchKey()
if url_match_key in self._url_match_keys_to_default_tag_import_options:
2018-08-08 20:29:54 +00:00
return self._url_match_keys_to_default_tag_import_options[ url_match_key ]
2018-04-18 22:10:15 +00:00
else:
2018-08-08 20:29:54 +00:00
url_type = url_match.GetURLType()
2018-04-18 22:10:15 +00:00
2018-08-08 20:29:54 +00:00
if url_type == HC.URL_TYPE_POST:
2018-06-20 20:20:22 +00:00
2018-08-08 20:29:54 +00:00
return self._file_post_default_tag_import_options
2018-06-20 20:20:22 +00:00
2018-08-08 20:29:54 +00:00
elif url_type == HC.URL_TYPE_WATCHABLE:
2018-06-20 20:20:22 +00:00
2018-08-08 20:29:54 +00:00
return self._watchable_default_tag_import_options
2018-06-20 20:20:22 +00:00
2018-08-08 20:29:54 +00:00
else:
raise HydrusExceptions.URLMatchException( 'Could not find tag import options for that kind of URL Class!' )
2018-06-20 20:20:22 +00:00
2018-04-18 22:10:15 +00:00
2018-09-05 20:52:32 +00:00
def _GetGUG( self, gug_key_and_name ):
( gug_key, gug_name ) = gug_key_and_name
if gug_key in self._gug_keys_to_gugs:
return self._gug_keys_to_gugs[ gug_key ]
elif gug_name in self._gug_names_to_gugs:
return self._gug_names_to_gugs[ gug_name ]
else:
return None
2018-05-30 20:13:21 +00:00
def _GetNormalisedAPIURLMatchAndURL( self, url ):
2018-01-17 22:52:10 +00:00
2018-05-30 20:13:21 +00:00
url_match = self._GetURLMatch( url )
2018-01-17 22:52:10 +00:00
2018-05-30 20:13:21 +00:00
if url_match is None:
raise HydrusExceptions.URLMatchException( 'Could not find a URL Class for ' + url + '!' )
2018-06-06 21:27:02 +00:00
seen_url_matches = set()
seen_url_matches.add( url_match )
2018-05-30 20:13:21 +00:00
api_url_match = url_match
api_url = url
while api_url_match.UsesAPIURL():
api_url = api_url_match.GetAPIURL( api_url )
api_url_match = self._GetURLMatch( api_url )
if api_url_match is None:
raise HydrusExceptions.URLMatchException( 'Could not find an API URL Class for ' + api_url + ' URL, which originally came from ' + url + '!' )
2018-06-06 21:27:02 +00:00
if api_url_match in seen_url_matches:
loop_size = len( seen_url_matches )
if loop_size == 1:
message = 'Could not find an API URL Class for ' + url + ' as the url class API-linked to itself!'
elif loop_size == 2:
message = 'Could not find an API URL Class for ' + url + ' as the url class and its API url class API-linked to each other!'
else:
2018-07-04 20:48:28 +00:00
message = 'Could not find an API URL Class for ' + url + ' as it and its API url classes linked in a loop of size ' + HydrusData.ToHumanInt( loop_size ) + '!'
2018-06-06 21:27:02 +00:00
raise HydrusExceptions.URLMatchException( message )
seen_url_matches.add( api_url_match )
2018-05-30 20:13:21 +00:00
api_url = api_url_match.Normalise( api_url )
return ( api_url_match, api_url )
2018-07-04 20:48:28 +00:00
def _GetURLToFetchAndParser( self, url ):
2018-05-30 20:13:21 +00:00
2018-07-04 20:48:28 +00:00
try:
2018-05-30 20:13:21 +00:00
2018-07-04 20:48:28 +00:00
( parser_url_match, parser_url ) = self._GetNormalisedAPIURLMatchAndURL( url )
2018-05-30 20:13:21 +00:00
2018-07-04 20:48:28 +00:00
except HydrusExceptions.URLMatchException as e:
2018-05-30 20:13:21 +00:00
2018-07-04 20:48:28 +00:00
raise HydrusExceptions.URLMatchException( 'Could not find a parser for ' + url + '!' + os.linesep * 2 + HydrusData.ToUnicode( e ) )
2018-05-30 20:13:21 +00:00
url_match_key = parser_url_match.GetMatchKey()
2018-01-17 22:52:10 +00:00
if url_match_key in self._url_match_keys_to_parser_keys:
parser_key = self._url_match_keys_to_parser_keys[ url_match_key ]
if parser_key is not None and parser_key in self._parser_keys_to_parsers:
2018-07-04 20:48:28 +00:00
return ( parser_url, self._parser_keys_to_parsers[ parser_key ] )
2018-01-17 22:52:10 +00:00
2018-05-30 20:13:21 +00:00
raise HydrusExceptions.URLMatchException( 'Could not find a parser for ' + parser_url_match.GetName() + ' URL Class!' )
2018-01-17 22:52:10 +00:00
2017-10-11 17:38:14 +00:00
def _GetSerialisableInfo( self ):
2018-08-29 20:20:41 +00:00
serialisable_gugs = self._gugs.GetSerialisableTuple()
2018-09-05 20:52:32 +00:00
serialisable_gug_keys_to_display = [ gug_key.encode( 'hex' ) for gug_key in self._gug_keys_to_display ]
2018-08-29 20:20:41 +00:00
2017-10-11 17:38:14 +00:00
serialisable_url_matches = self._url_matches.GetSerialisableTuple()
2018-01-17 22:52:10 +00:00
serialisable_url_match_keys_to_display = [ url_match_key.encode( 'hex' ) for url_match_key in self._url_match_keys_to_display ]
serialisable_url_match_keys_to_parser_keys = self._url_match_keys_to_parser_keys.GetSerialisableTuple()
2018-04-18 22:10:15 +00:00
serialisable_file_post_default_tag_import_options = self._file_post_default_tag_import_options.GetSerialisableTuple()
serialisable_watchable_default_tag_import_options = self._watchable_default_tag_import_options.GetSerialisableTuple()
serialisable_url_match_keys_to_default_tag_import_options = [ ( url_match_key.encode( 'hex' ), tag_import_options.GetSerialisableTuple() ) for ( url_match_key, tag_import_options ) in self._url_match_keys_to_default_tag_import_options.items() ]
serialisable_default_tag_import_options_tuple = ( serialisable_file_post_default_tag_import_options, serialisable_watchable_default_tag_import_options, serialisable_url_match_keys_to_default_tag_import_options )
2018-01-17 22:52:10 +00:00
serialisable_parsers = self._parsers.GetSerialisableTuple()
2017-10-11 17:38:14 +00:00
serialisable_network_contexts_to_custom_header_dicts = [ ( network_context.GetSerialisableTuple(), custom_header_dict.items() ) for ( network_context, custom_header_dict ) in self._network_contexts_to_custom_header_dicts.items() ]
2018-09-05 20:52:32 +00:00
return ( serialisable_gugs, serialisable_gug_keys_to_display, serialisable_url_matches, serialisable_url_match_keys_to_display, serialisable_url_match_keys_to_parser_keys, serialisable_default_tag_import_options_tuple, serialisable_parsers, serialisable_network_contexts_to_custom_header_dicts )
2017-10-11 17:38:14 +00:00
2017-09-27 21:52:54 +00:00
def _GetURLMatch( self, url ):
2017-12-06 22:06:56 +00:00
domain = ConvertDomainIntoSecondLevelDomain( ConvertURLIntoDomain( url ) )
2017-09-27 21:52:54 +00:00
if domain in self._domains_to_url_matches:
url_matches = self._domains_to_url_matches[ domain ]
for url_match in url_matches:
2017-11-22 21:03:07 +00:00
try:
url_match.Test( url )
2017-12-06 22:06:56 +00:00
return url_match
2017-09-27 21:52:54 +00:00
2017-11-22 21:03:07 +00:00
except HydrusExceptions.URLMatchException:
continue
2017-09-27 21:52:54 +00:00
return None
2017-10-11 17:38:14 +00:00
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
2018-09-05 20:52:32 +00:00
( serialisable_gugs, serialisable_gug_keys_to_display, serialisable_url_matches, serialisable_url_match_keys_to_display, serialisable_url_match_keys_to_parser_keys, serialisable_default_tag_import_options_tuple, serialisable_parsers, serialisable_network_contexts_to_custom_header_dicts ) = serialisable_info
2018-08-29 20:20:41 +00:00
self._gugs = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_gugs )
2017-10-11 17:38:14 +00:00
2018-09-05 20:52:32 +00:00
self._gug_keys_to_display = { serialisable_gug_key.decode( 'hex' ) for serialisable_gug_key in serialisable_gug_keys_to_display }
2017-10-11 17:38:14 +00:00
self._url_matches = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_url_matches )
2018-01-17 22:52:10 +00:00
self._url_match_keys_to_display = { serialisable_url_match_key.decode( 'hex' ) for serialisable_url_match_key in serialisable_url_match_keys_to_display }
self._url_match_keys_to_parser_keys = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_url_match_keys_to_parser_keys )
2018-04-18 22:10:15 +00:00
( serialisable_file_post_default_tag_import_options, serialisable_watchable_default_tag_import_options, serialisable_url_match_keys_to_default_tag_import_options ) = serialisable_default_tag_import_options_tuple
self._file_post_default_tag_import_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_file_post_default_tag_import_options )
self._watchable_default_tag_import_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_watchable_default_tag_import_options )
self._url_match_keys_to_default_tag_import_options = { serialisable_url_match_key.decode( 'hex' ) : HydrusSerialisable.CreateFromSerialisableTuple( serialisable_tag_import_options ) for ( serialisable_url_match_key, serialisable_tag_import_options ) in serialisable_url_match_keys_to_default_tag_import_options }
2018-01-17 22:52:10 +00:00
self._parsers = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_parsers )
2017-12-06 22:06:56 +00:00
2017-10-11 17:38:14 +00:00
self._network_contexts_to_custom_header_dicts = collections.defaultdict( dict )
for ( serialisable_network_context, custom_header_dict_items ) in serialisable_network_contexts_to_custom_header_dicts:
network_context = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_network_context )
custom_header_dict = dict( custom_header_dict_items )
self._network_contexts_to_custom_header_dicts[ network_context ] = custom_header_dict
2017-10-04 17:51:58 +00:00
def _RecalcCache( self ):
self._domains_to_url_matches = collections.defaultdict( list )
2017-09-13 20:50:41 +00:00
2017-10-04 17:51:58 +00:00
for url_match in self._url_matches:
domain = url_match.GetDomain()
self._domains_to_url_matches[ domain ].append( url_match )
2017-09-13 20:50:41 +00:00
2017-12-06 22:06:56 +00:00
for url_matches in self._domains_to_url_matches.values():
2018-08-01 20:44:57 +00:00
NetworkDomainManager.STATICSortURLMatchesDescendingComplexity( url_matches )
2017-12-06 22:06:56 +00:00
2018-09-05 20:52:32 +00:00
self._gug_keys_to_gugs = { gug.GetGUGKey() : gug for gug in self._gugs }
self._gug_names_to_gugs = { gug.GetName() : gug for gug in self._gugs }
2018-01-17 22:52:10 +00:00
2018-09-05 20:52:32 +00:00
self._parser_keys_to_parsers = { parser.GetParserKey() : parser for parser in self._parsers }
2018-01-17 22:52:10 +00:00
2018-08-08 20:29:54 +00:00
namespaces = set()
for parser in self._parsers:
namespaces.update( parser.GetNamespaces() )
self._parser_namespaces = list( namespaces )
self._parser_namespaces.sort()
2017-10-04 17:51:58 +00:00
def _SetDirty( self ):
2017-09-13 20:50:41 +00:00
2017-10-04 17:51:58 +00:00
self._dirty = True
2017-09-13 20:50:41 +00:00
2017-12-06 22:06:56 +00:00
def _UpdateSerialisableInfo( self, version, old_serialisable_info ):
if version == 1:
( serialisable_url_matches, serialisable_network_contexts_to_custom_header_dicts ) = old_serialisable_info
url_matches = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_url_matches )
url_match_names_to_display = {}
2018-01-17 22:52:10 +00:00
url_match_names_to_page_parser_keys = HydrusSerialisable.SerialisableBytesDictionary()
url_match_names_to_gallery_parser_keys = HydrusSerialisable.SerialisableBytesDictionary()
2017-12-06 22:06:56 +00:00
for url_match in url_matches:
name = url_match.GetName()
if url_match.IsPostURL():
url_match_names_to_display[ name ] = True
2018-01-17 22:52:10 +00:00
url_match_names_to_page_parser_keys[ name ] = None
2017-12-06 22:06:56 +00:00
2017-12-13 22:33:07 +00:00
if url_match.IsGalleryURL() or url_match.IsWatchableURL():
2017-12-06 22:06:56 +00:00
2018-01-17 22:52:10 +00:00
url_match_names_to_gallery_parser_keys[ name ] = None
2017-12-06 22:06:56 +00:00
serialisable_url_match_names_to_display = url_match_names_to_display.items()
2018-01-17 22:52:10 +00:00
serialisable_url_match_names_to_page_parser_keys = url_match_names_to_page_parser_keys.GetSerialisableTuple()
serialisable_url_match_names_to_gallery_parser_keys = url_match_names_to_gallery_parser_keys.GetSerialisableTuple()
2017-12-06 22:06:56 +00:00
2018-01-17 22:52:10 +00:00
new_serialisable_info = ( serialisable_url_matches, serialisable_url_match_names_to_display, serialisable_url_match_names_to_page_parser_keys, serialisable_url_match_names_to_gallery_parser_keys, serialisable_network_contexts_to_custom_header_dicts )
2017-12-06 22:06:56 +00:00
return ( 2, new_serialisable_info )
2018-01-17 22:52:10 +00:00
if version == 2:
2017-12-06 22:06:56 +00:00
2018-01-17 22:52:10 +00:00
( serialisable_url_matches, serialisable_url_match_names_to_display, serialisable_url_match_names_to_page_parser_keys, serialisable_url_match_names_to_gallery_parser_keys, serialisable_network_contexts_to_custom_header_dicts ) = old_serialisable_info
2017-12-06 22:06:56 +00:00
2018-01-17 22:52:10 +00:00
parsers = HydrusSerialisable.SerialisableList()
serialisable_parsing_parsers = parsers.GetSerialisableTuple()
url_match_names_to_display = dict( serialisable_url_match_names_to_display )
url_match_keys_to_display = []
url_match_names_to_gallery_parser_keys = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_url_match_names_to_gallery_parser_keys )
url_match_names_to_page_parser_keys = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_url_match_names_to_page_parser_keys )
url_match_keys_to_parser_keys = HydrusSerialisable.SerialisableBytesDictionary()
url_matches = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_url_matches )
for url_match in url_matches:
2017-12-06 22:06:56 +00:00
2018-01-17 22:52:10 +00:00
url_match_key = url_match.GetMatchKey()
2017-12-06 22:06:56 +00:00
2018-01-17 22:52:10 +00:00
name = url_match.GetName()
2017-12-06 22:06:56 +00:00
2018-01-17 22:52:10 +00:00
if name in url_match_names_to_display and url_match_names_to_display[ name ]:
2017-12-06 22:06:56 +00:00
2018-01-17 22:52:10 +00:00
url_match_keys_to_display.append( url_match_key )
2017-12-06 22:06:56 +00:00
2018-01-17 22:52:10 +00:00
serialisable_url_matches = url_matches.GetSerialisableTuple() # added random key this week, so save these changes back again!
serialisable_url_match_keys_to_display = [ url_match_key.encode( 'hex' ) for url_match_key in url_match_keys_to_display ]
serialisable_url_match_keys_to_parser_keys = url_match_keys_to_parser_keys.GetSerialisableTuple()
new_serialisable_info = ( serialisable_url_matches, serialisable_url_match_keys_to_display, serialisable_url_match_keys_to_parser_keys, serialisable_parsing_parsers, serialisable_network_contexts_to_custom_header_dicts )
return ( 3, new_serialisable_info )
2017-12-06 22:06:56 +00:00
2018-04-18 22:10:15 +00:00
if version == 3:
( serialisable_url_matches, serialisable_url_match_keys_to_display, serialisable_url_match_keys_to_parser_keys, serialisable_parsing_parsers, serialisable_network_contexts_to_custom_header_dicts ) = old_serialisable_info
import ClientImportOptions
self._file_post_default_tag_import_options = ClientImportOptions.TagImportOptions()
self._watchable_default_tag_import_options = ClientImportOptions.TagImportOptions()
self._url_match_keys_to_default_tag_import_options = {}
serialisable_file_post_default_tag_import_options = self._file_post_default_tag_import_options.GetSerialisableTuple()
serialisable_watchable_default_tag_import_options = self._watchable_default_tag_import_options.GetSerialisableTuple()
serialisable_url_match_keys_to_default_tag_import_options = [ ( url_match_key.encode( 'hex' ), tag_import_options.GetSerialisableTuple() ) for ( url_match_key, tag_import_options ) in self._url_match_keys_to_default_tag_import_options.items() ]
serialisable_default_tag_import_options_tuple = ( serialisable_file_post_default_tag_import_options, serialisable_watchable_default_tag_import_options, serialisable_url_match_keys_to_default_tag_import_options )
new_serialisable_info = ( serialisable_url_matches, serialisable_url_match_keys_to_display, serialisable_url_match_keys_to_parser_keys, serialisable_default_tag_import_options_tuple, serialisable_parsing_parsers, serialisable_network_contexts_to_custom_header_dicts )
return ( 4, new_serialisable_info )
2018-08-29 20:20:41 +00:00
if version == 4:
( serialisable_url_matches, serialisable_url_match_keys_to_display, serialisable_url_match_keys_to_parser_keys, serialisable_default_tag_import_options_tuple, serialisable_parsing_parsers, serialisable_network_contexts_to_custom_header_dicts ) = old_serialisable_info
gugs = HydrusSerialisable.SerialisableList()
serialisable_gugs = gugs.GetSerialisableTuple()
new_serialisable_info = ( serialisable_gugs, serialisable_url_matches, serialisable_url_match_keys_to_display, serialisable_url_match_keys_to_parser_keys, serialisable_default_tag_import_options_tuple, serialisable_parsing_parsers, serialisable_network_contexts_to_custom_header_dicts )
return ( 5, new_serialisable_info )
2018-09-05 20:52:32 +00:00
if version == 5:
( serialisable_gugs, serialisable_url_matches, serialisable_url_match_keys_to_display, serialisable_url_match_keys_to_parser_keys, serialisable_default_tag_import_options_tuple, serialisable_parsing_parsers, serialisable_network_contexts_to_custom_header_dicts ) = old_serialisable_info
gugs = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_gugs )
gug_keys_to_display = [ gug.GetGUGKey() for gug in gugs if 'ugoira' not in gug.GetName() ]
serialisable_gug_keys_to_display = [ gug_key.encode( 'hex' ) for gug_key in gug_keys_to_display ]
new_serialisable_info = ( serialisable_gugs, serialisable_gug_keys_to_display, serialisable_url_matches, serialisable_url_match_keys_to_display, serialisable_url_match_keys_to_parser_keys, serialisable_default_tag_import_options_tuple, serialisable_parsing_parsers, serialisable_network_contexts_to_custom_header_dicts )
return ( 6, new_serialisable_info )
def AddGUGs( self, new_gugs ):
with self._lock:
gugs = list( self._gugs )
2018-09-19 21:54:51 +00:00
for gug in new_gugs:
gug.SetNonDupeName( [ g.GetName() for g in gugs ] )
gugs.append( gug )
2018-09-05 20:52:32 +00:00
self.SetGUGs( gugs )
def AddParsers( self, new_parsers ):
with self._lock:
parsers = list( self._parsers )
2018-09-19 21:54:51 +00:00
for parser in new_parsers:
parser.SetNonDupeName( [ p.GetName() for p in parsers ] )
parsers.append( parser )
2018-09-05 20:52:32 +00:00
self.SetParsers( parsers )
2017-12-06 22:06:56 +00:00
2018-09-19 21:54:51 +00:00
def AddURLMatches( self, new_url_matches ):
with self._lock:
url_matches = list( self._url_matches )
for url_match in new_url_matches:
url_match.SetNonDupeName( [ u.GetName() for u in url_matches ] )
url_matches.append( url_match )
self.SetURLMatches( url_matches )
2018-10-03 21:00:15 +00:00
def AlreadyHaveExactlyTheseHeaders( self, network_context, headers_list ):
with self._lock:
if network_context in self._network_contexts_to_custom_header_dicts:
custom_headers_dict = self._network_contexts_to_custom_header_dicts[ network_context ]
if len( headers_list ) != len( custom_headers_dict ):
return False
for ( key, value, reason ) in headers_list:
if key not in custom_headers_dict:
return False
( existing_value, existing_approved, existing_reason ) = custom_headers_dict[ key ]
if existing_value != value:
return False
return True
return False
2018-09-19 21:54:51 +00:00
def AlreadyHaveExactlyThisGUG( self, new_gug ):
with self._lock:
# absent irrelevant variables, do we have the exact same object already in?
gug_key_and_name = new_gug.GetGUGKeyAndName()
dupe_gugs = [ gug.Duplicate() for gug in self._gugs ]
for dupe_gug in dupe_gugs:
dupe_gug.SetGUGKeyAndName( gug_key_and_name )
if dupe_gug.DumpToString() == new_gug.DumpToString():
return True
return False
def AlreadyHaveExactlyThisParser( self, new_parser ):
with self._lock:
# absent irrelevant variables, do we have the exact same object already in?
new_name = new_parser.GetName()
new_parser_key = new_parser.GetParserKey()
new_example_urls = new_parser.GetExampleURLs()
new_example_parsing_context = new_parser.GetExampleParsingContext()
dupe_parsers = [ ( parser.Duplicate(), parser ) for parser in self._parsers ]
for ( dupe_parser, parser ) in dupe_parsers:
dupe_parser.SetName( new_name )
dupe_parser.SetParserKey( new_parser_key )
dupe_parser.SetExampleURLs( new_example_urls )
dupe_parser.SetExampleParsingContext( new_example_parsing_context )
if dupe_parser.DumpToString() == new_parser.DumpToString():
# since these are the 'same', let's merge example urls
parser_example_urls = set( parser.GetExampleURLs() )
parser_example_urls.update( new_example_urls )
parser_example_urls = list( parser_example_urls )
parser.SetExampleURLs( parser_example_urls )
self._SetDirty()
return True
return False
def AlreadyHaveExactlyThisURLMatch( self, new_url_match ):
with self._lock:
# absent irrelevant variables, do we have the exact same object already in?
name = new_url_match.GetName()
match_key = new_url_match.GetMatchKey()
example_url = new_url_match.GetExampleURL()
dupe_url_matches = [ url_match.Duplicate() for url_match in self._url_matches ]
for dupe_url_match in dupe_url_matches:
dupe_url_match.SetName( name )
dupe_url_match.SetMatchKey( match_key )
dupe_url_match.SetExampleURL( example_url )
if dupe_url_match.DumpToString() == new_url_match.DumpToString():
return True
return False
2018-10-03 21:00:15 +00:00
def AutoAddDomainMetadatas( self, domain_metadatas, approved = False ):
for domain_metadata in domain_metadatas:
if not domain_metadata.HasHeaders():
continue
with self._lock:
domain = domain_metadata.GetDomain()
network_context = ClientNetworkingContexts.NetworkContext( CC.NETWORK_CONTEXT_DOMAIN, domain )
headers_list = domain_metadata.GetHeaders()
custom_headers_dict = { key : ( value, approved, reason ) for ( key, value, reason ) in headers_list }
self._network_contexts_to_custom_header_dicts[ network_context ] = custom_headers_dict
2018-09-19 21:54:51 +00:00
def AutoAddURLMatchesAndParsers( self, new_url_matches, dupe_url_matches, new_parsers ):
for url_match in new_url_matches:
url_match.RegenerateMatchKey()
for parser in new_parsers:
parser.RegenerateParserKey()
# any existing url matches that already do the job of the new ones should be hung on to but renamed
with self._lock:
prefix = 'zzz - renamed due to auto-import - '
renamees = []
for existing_url_match in self._url_matches:
if existing_url_match.GetName().startswith( prefix ):
continue
for new_url_match in new_url_matches:
if new_url_match.Matches( existing_url_match.GetExampleURL() ) and existing_url_match.Matches( new_url_match.GetExampleURL ):
# the url matches match each other, so they are doing the same job
renamees.append( existing_url_match )
break
for renamee in renamees:
existing_names = [ url_match.GetName() for url_match in self._url_matches if url_match != renamee ]
renamee.SetName( prefix + renamee.GetName() )
renamee.SetNonDupeName( existing_names )
self.AddURLMatches( new_url_matches )
self.AddParsers( new_parsers )
# we want to match these url matches and parsers together if possible
with self._lock:
url_matches_to_link = list( new_url_matches )
# if downloader adds existing url match but updated parser, we want to update the existing link
for dupe_url_match in dupe_url_matches:
# this is to make sure we have the right match keys for the link update in a minute
actual_existing_dupe_url_match = self._GetURLMatch( dupe_url_match.GetExampleURL() )
if actual_existing_dupe_url_match is not None:
url_matches_to_link.append( actual_existing_dupe_url_match )
new_url_match_keys_to_parser_keys = NetworkDomainManager.STATICLinkURLMatchesAndParsers( url_matches_to_link, new_parsers, {} )
self._url_match_keys_to_parser_keys.update( new_url_match_keys_to_parser_keys )
# let's do a trytolink just in case there are loose ends due to some dupe being discarded earlier (e.g. url match is new, but parser was not).
self.TryToLinkURLMatchesAndParsers()
2017-10-04 17:51:58 +00:00
def CanValidateInPopup( self, network_contexts ):
2017-09-27 21:52:54 +00:00
2017-10-04 17:51:58 +00:00
# we can always do this for headers
2017-09-27 21:52:54 +00:00
2017-10-04 17:51:58 +00:00
return True
2017-09-27 21:52:54 +00:00
2017-12-06 22:06:56 +00:00
def ConvertURLsToMediaViewerTuples( self, urls ):
url_tuples = []
with self._lock:
for url in urls:
url_match = self._GetURLMatch( url )
if url_match is None:
2018-08-08 20:29:54 +00:00
if False:
domain = ConvertURLIntoDomain( url )
url_tuples.append( ( domain, url ) )
2017-12-06 22:06:56 +00:00
else:
2018-01-17 22:52:10 +00:00
url_match_key = url_match.GetMatchKey()
2017-12-06 22:06:56 +00:00
2018-05-09 20:23:00 +00:00
if url_match_key in self._url_match_keys_to_display:
2017-12-06 22:06:56 +00:00
2018-01-17 22:52:10 +00:00
url_match_name = url_match.GetName()
url_tuples.append( ( url_match_name, url ) )
2017-12-06 22:06:56 +00:00
if len( url_tuples ) == 10:
break
url_tuples.sort()
return url_tuples
2018-09-05 20:52:32 +00:00
def DeleteGUGs( self, deletee_names ):
with self._lock:
gugs = [ gug for gug in self._gugs if gug.GetName() not in deletee_names ]
self.SetGUGs( gugs )
2017-10-11 17:38:14 +00:00
def GenerateValidationPopupProcess( self, network_contexts ):
2017-10-04 17:51:58 +00:00
2017-09-13 20:50:41 +00:00
with self._lock:
2017-10-11 17:38:14 +00:00
header_tuples = []
for network_context in network_contexts:
if network_context in self._network_contexts_to_custom_header_dicts:
custom_header_dict = self._network_contexts_to_custom_header_dicts[ network_context ]
for ( key, ( value, approved, reason ) ) in custom_header_dict.items():
if approved == VALID_UNKNOWN:
header_tuples.append( ( network_context, key, value, reason ) )
2017-09-27 21:52:54 +00:00
2017-10-11 17:38:14 +00:00
process = DomainValidationPopupProcess( self, header_tuples )
return process
2017-09-13 20:50:41 +00:00
2017-10-04 17:51:58 +00:00
2017-09-27 21:52:54 +00:00
2018-09-05 20:52:32 +00:00
def GetDefaultGUGKeyAndName( self ):
with self._lock:
2018-09-26 19:05:12 +00:00
gug_key = HG.client_controller.new_options.GetKey( 'default_gug_key' )
gug_name = HG.client_controller.new_options.GetString( 'default_gug_name' )
return ( gug_key, gug_name )
2018-09-05 20:52:32 +00:00
2018-04-18 22:10:15 +00:00
def GetDefaultTagImportOptions( self ):
with self._lock:
return ( self._file_post_default_tag_import_options, self._watchable_default_tag_import_options, self._url_match_keys_to_default_tag_import_options )
2018-07-11 20:23:51 +00:00
def GetDefaultTagImportOptionsForPosts( self ):
with self._lock:
return self._file_post_default_tag_import_options.Duplicate()
2018-04-18 22:10:15 +00:00
def GetDefaultTagImportOptionsForURL( self, url ):
with self._lock:
2018-07-04 20:48:28 +00:00
return self._GetDefaultTagImportOptionsForURL( url )
2018-04-18 22:10:15 +00:00
2017-09-27 21:52:54 +00:00
def GetDownloader( self, url ):
with self._lock:
# this might be better as getdownloaderkey, but we'll see how it shakes out
# might also be worth being a getifhasdownloader
# match the url to a url_match, then lookup that in a 'this downloader can handle this url_match type' dict that we'll manage
pass
2018-09-05 20:52:32 +00:00
def GetGUG( self, gug_key_and_name ):
with self._lock:
return self._GetGUG( gug_key_and_name )
2018-08-22 21:10:59 +00:00
def GetGUGs( self ):
with self._lock:
return list( self._gugs )
2018-09-05 20:52:32 +00:00
def GetGUGKeysToDisplay( self ):
with self._lock:
return set( self._gug_keys_to_display )
2017-10-11 17:38:14 +00:00
def GetHeaders( self, network_contexts ):
with self._lock:
headers = {}
for network_context in network_contexts:
if network_context in self._network_contexts_to_custom_header_dicts:
custom_header_dict = self._network_contexts_to_custom_header_dicts[ network_context ]
for ( key, ( value, approved, reason ) ) in custom_header_dict.items():
if approved == VALID_APPROVED:
headers[ key ] = value
return headers
2018-09-05 20:52:32 +00:00
def GetInitialSearchText( self, gug_key_and_name ):
with self._lock:
gug = self._GetGUG( gug_key_and_name )
if gug is None:
return 'unknown downloader'
else:
return gug.GetInitialSearchText()
2017-10-11 17:38:14 +00:00
def GetNetworkContextsToCustomHeaderDicts( self ):
with self._lock:
return dict( self._network_contexts_to_custom_header_dicts )
2018-06-20 20:20:22 +00:00
def GetParser( self, name ):
with self._lock:
for parser in self._parsers:
if parser.GetName() == name:
return parser
return None
2018-01-24 23:09:42 +00:00
def GetParsers( self ):
with self._lock:
return list( self._parsers )
2018-08-08 20:29:54 +00:00
def GetParserNamespaces( self ):
with self._lock:
return list( self._parser_namespaces )
2018-10-03 21:00:15 +00:00
def GetShareableCustomHeaders( self, network_context ):
with self._lock:
headers_list = []
if network_context in self._network_contexts_to_custom_header_dicts:
custom_header_dict = self._network_contexts_to_custom_header_dicts[ network_context ]
for ( key, ( value, approved, reason ) ) in custom_header_dict.items():
headers_list.append( ( key, value, reason ) )
return headers_list
2018-05-09 20:23:00 +00:00
def GetURLMatch( self, url ):
with self._lock:
return self._GetURLMatch( url )
2017-11-29 21:48:23 +00:00
def GetURLMatches( self ):
with self._lock:
return list( self._url_matches )
2018-09-05 20:52:32 +00:00
def GetURLMatchKeysToParserKeys( self ):
with self._lock:
return dict( self._url_match_keys_to_parser_keys )
def GetURLMatchKeysToDisplay( self ):
2017-12-06 22:06:56 +00:00
with self._lock:
2018-09-05 20:52:32 +00:00
return set( self._url_match_keys_to_display )
2017-12-06 22:06:56 +00:00
2018-01-17 22:52:10 +00:00
def GetURLParseCapability( self, url ):
2018-02-07 23:40:33 +00:00
with self._lock:
2018-01-17 22:52:10 +00:00
2018-02-07 23:40:33 +00:00
url_match = self._GetURLMatch( url )
if url_match is None:
return ( HC.URL_TYPE_UNKNOWN, 'unknown url', False )
url_type = url_match.GetURLType()
match_name = url_match.GetName()
2018-05-30 20:13:21 +00:00
try:
2018-02-07 23:40:33 +00:00
2018-07-04 20:48:28 +00:00
( url_to_fetch, parser ) = self._GetURLToFetchAndParser( url )
2018-02-07 23:40:33 +00:00
2018-05-30 20:13:21 +00:00
can_parse = True
2018-02-07 23:40:33 +00:00
2018-05-30 20:13:21 +00:00
except HydrusExceptions.URLMatchException:
2018-02-07 23:40:33 +00:00
can_parse = False
2018-01-17 22:52:10 +00:00
2018-02-07 23:40:33 +00:00
return ( url_type, match_name, can_parse )
2018-01-17 22:52:10 +00:00
2018-02-07 23:40:33 +00:00
def GetURLToFetchAndParser( self, url ):
2018-01-17 22:52:10 +00:00
2018-02-07 23:40:33 +00:00
with self._lock:
2018-01-17 22:52:10 +00:00
2018-07-04 20:48:28 +00:00
return self._GetURLToFetchAndParser( url )
2018-01-17 22:52:10 +00:00
2018-10-03 21:00:15 +00:00
def HasCustomHeaders( self, network_context ):
with self._lock:
return network_context in self._network_contexts_to_custom_header_dicts and len( self._network_contexts_to_custom_header_dicts[ network_context ] ) > 0
2017-12-06 22:06:56 +00:00
def Initialise( self ):
self._RecalcCache()
2017-10-11 17:38:14 +00:00
def IsDirty( self ):
with self._lock:
return self._dirty
2017-10-04 17:51:58 +00:00
def IsValid( self, network_contexts ):
2017-09-27 21:52:54 +00:00
2017-10-04 17:51:58 +00:00
# for now, let's say that denied headers are simply not added, not that they invalidate a query
2017-09-27 21:52:54 +00:00
2017-10-04 17:51:58 +00:00
for network_context in network_contexts:
2017-10-11 17:38:14 +00:00
if network_context in self._network_contexts_to_custom_header_dicts:
2017-10-04 17:51:58 +00:00
2017-10-11 17:38:14 +00:00
custom_header_dict = self._network_contexts_to_custom_header_dicts[ network_context ]
2017-10-04 17:51:58 +00:00
2017-10-11 17:38:14 +00:00
for ( value, approved, reason ) in custom_header_dict.values():
2017-10-04 17:51:58 +00:00
if approved == VALID_UNKNOWN:
return False
return True
2017-09-27 21:52:54 +00:00
def NormaliseURL( self, url ):
with self._lock:
url_match = self._GetURLMatch( url )
if url_match is None:
2017-09-13 20:50:41 +00:00
2018-08-22 21:10:59 +00:00
p = urlparse.urlparse( url )
scheme = p.scheme
netloc = p.netloc
path = p.path
params = p.params
query = AlphabetiseQueryText( p.query )
fragment = p.fragment
r = urlparse.ParseResult( scheme, netloc, path, params, query, fragment )
normalised_url = r.geturl()
else:
normalised_url = url_match.Normalise( url )
2017-09-13 20:50:41 +00:00
2017-09-27 21:52:54 +00:00
return normalised_url
2017-09-13 20:50:41 +00:00
2018-09-05 20:52:32 +00:00
def OverwriteDefaultGUGs( self, gug_names ):
with self._lock:
import ClientDefaults
default_gugs = ClientDefaults.GetDefaultGUGs()
for gug in default_gugs:
gug.RegenerateGUGKey()
existing_gugs = list( self._gugs )
new_gugs = [ gug for gug in existing_gugs if gug.GetName() not in gug_names ]
new_gugs.extend( [ gug for gug in default_gugs if gug.GetName() in gug_names ] )
self.SetGUGs( new_gugs )
2018-05-09 20:23:00 +00:00
def OverwriteDefaultParsers( self, parser_names ):
with self._lock:
import ClientDefaults
default_parsers = ClientDefaults.GetDefaultParsers()
2018-08-22 21:10:59 +00:00
for parser in default_parsers:
parser.RegenerateParserKey()
2018-05-09 20:23:00 +00:00
existing_parsers = list( self._parsers )
new_parsers = [ parser for parser in existing_parsers if parser.GetName() not in parser_names ]
new_parsers.extend( [ parser for parser in default_parsers if parser.GetName() in parser_names ] )
self.SetParsers( new_parsers )
def OverwriteDefaultURLMatches( self, url_match_names ):
with self._lock:
import ClientDefaults
default_url_matches = ClientDefaults.GetDefaultURLMatches()
2018-08-22 21:10:59 +00:00
for url_match in default_url_matches:
2018-09-19 21:54:51 +00:00
url_match.RegenerateMatchKey()
2018-08-22 21:10:59 +00:00
2018-05-09 20:23:00 +00:00
existing_url_matches = list( self._url_matches )
new_url_matches = [ url_match for url_match in existing_url_matches if url_match.GetName() not in url_match_names ]
new_url_matches.extend( [ url_match for url_match in default_url_matches if url_match.GetName() in url_match_names ] )
self.SetURLMatches( new_url_matches )
2018-06-20 20:20:22 +00:00
def OverwriteParserLink( self, url_match, parser ):
with self._lock:
url_match_key = url_match.GetMatchKey()
parser_key = parser.GetParserKey()
self._url_match_keys_to_parser_keys[ url_match_key ] = parser_key
2017-10-04 17:51:58 +00:00
def SetClean( self ):
2017-09-13 20:50:41 +00:00
with self._lock:
2017-10-04 17:51:58 +00:00
self._dirty = False
2018-09-26 19:05:12 +00:00
def SetDefaultGUGKeyAndName( self, gug_key_and_name ):
with self._lock:
( gug_key, gug_name ) = gug_key_and_name
HG.client_controller.new_options.SetKey( 'default_gug_key', gug_key )
HG.client_controller.new_options.SetString( 'default_gug_name', gug_name )
2018-04-18 22:10:15 +00:00
def SetDefaultTagImportOptions( self, file_post_default_tag_import_options, watchable_default_tag_import_options, url_match_keys_to_tag_import_options ):
with self._lock:
self._file_post_default_tag_import_options = file_post_default_tag_import_options
self._watchable_default_tag_import_options = watchable_default_tag_import_options
self._url_match_keys_to_default_tag_import_options = url_match_keys_to_tag_import_options
2018-05-09 20:23:00 +00:00
self._SetDirty()
2018-04-18 22:10:15 +00:00
2018-08-22 21:10:59 +00:00
def SetGUGs( self, gugs ):
with self._lock:
2018-09-05 20:52:32 +00:00
# by default, we will show new gugs
old_gug_keys = { gug.GetGUGKey() for gug in self._gugs }
gug_keys = { gug.GetGUGKey() for gug in gugs }
added_gug_keys = gug_keys.difference( old_gug_keys )
self._gug_keys_to_display.update( added_gug_keys )
#
2018-08-22 21:10:59 +00:00
2018-08-29 20:20:41 +00:00
self._gugs = HydrusSerialisable.SerialisableList( gugs )
2018-09-05 20:52:32 +00:00
self._RecalcCache()
self._SetDirty()
def SetGUGKeysToDisplay( self, gug_keys_to_display ):
with self._lock:
self._gug_keys_to_display = set()
self._gug_keys_to_display.update( gug_keys_to_display )
2018-08-29 20:20:41 +00:00
self._SetDirty()
2018-08-22 21:10:59 +00:00
2017-10-04 17:51:58 +00:00
def SetHeaderValidation( self, network_context, key, approved ):
with self._lock:
2017-10-11 17:38:14 +00:00
if network_context in self._network_contexts_to_custom_header_dicts:
custom_header_dict = self._network_contexts_to_custom_header_dicts[ network_context ]
if key in custom_header_dict:
( value, old_approved, reason ) = custom_header_dict[ key ]
custom_header_dict[ key ] = ( value, approved, reason )
self._SetDirty()
2017-10-04 17:51:58 +00:00
2017-10-11 17:38:14 +00:00
def SetNetworkContextsToCustomHeaderDicts( self, network_contexts_to_custom_header_dicts ):
with self._lock:
self._network_contexts_to_custom_header_dicts = network_contexts_to_custom_header_dicts
2017-10-04 17:51:58 +00:00
2017-10-11 17:38:14 +00:00
self._SetDirty()
2017-10-04 17:51:58 +00:00
2018-01-24 23:09:42 +00:00
def SetParsers( self, parsers ):
with self._lock:
self._parsers = HydrusSerialisable.SerialisableList()
self._parsers.extend( parsers )
2018-04-25 22:07:52 +00:00
self._parsers.sort( key = lambda p: p.GetName() )
2018-01-24 23:09:42 +00:00
# delete orphans
2018-01-31 22:58:15 +00:00
parser_keys = { parser.GetParserKey() for parser in parsers }
2018-01-24 23:09:42 +00:00
deletee_url_match_keys = set()
for ( url_match_key, parser_key ) in self._url_match_keys_to_parser_keys.items():
if parser_key not in parser_keys:
deletee_url_match_keys.add( url_match_key )
for deletee_url_match_key in deletee_url_match_keys:
del self._url_match_keys_to_parser_keys[ deletee_url_match_key ]
#
self._RecalcCache()
self._SetDirty()
2017-11-29 21:48:23 +00:00
def SetURLMatches( self, url_matches ):
with self._lock:
2018-05-09 20:23:00 +00:00
# by default, we will show post urls
2018-01-17 22:52:10 +00:00
2018-05-09 20:23:00 +00:00
old_post_url_match_keys = { url_match.GetMatchKey() for url_match in self._url_matches if url_match.IsPostURL() }
post_url_match_keys = { url_match.GetMatchKey() for url_match in url_matches if url_match.IsPostURL() }
2018-01-17 22:52:10 +00:00
2018-05-09 20:23:00 +00:00
added_post_url_match_keys = post_url_match_keys.difference( old_post_url_match_keys )
2018-01-17 22:52:10 +00:00
2018-05-09 20:23:00 +00:00
self._url_match_keys_to_display.update( added_post_url_match_keys )
2018-01-17 22:52:10 +00:00
#
2017-11-29 21:48:23 +00:00
self._url_matches = HydrusSerialisable.SerialisableList()
self._url_matches.extend( url_matches )
2018-04-25 22:07:52 +00:00
self._url_matches.sort( key = lambda u: u.GetName() )
2018-01-17 22:52:10 +00:00
#
# delete orphans
url_match_keys = { url_match.GetMatchKey() for url_match in url_matches }
self._url_match_keys_to_display.intersection_update( url_match_keys )
for deletee_key in set( self._url_match_keys_to_parser_keys.keys() ).difference( url_match_keys ):
del self._url_match_keys_to_parser_keys[ deletee_key ]
2017-12-06 22:06:56 +00:00
2018-02-07 23:40:33 +00:00
# any url matches that link to another via the API conversion will not be using parsers
url_match_api_pairs = ConvertURLMatchesIntoAPIPairs( self._url_matches )
for ( url_match_original, url_match_api ) in url_match_api_pairs:
url_match_key = url_match_original.GetMatchKey()
if url_match_key in self._url_match_keys_to_parser_keys:
del self._url_match_keys_to_parser_keys[ url_match_key ]
2017-12-06 22:06:56 +00:00
self._RecalcCache()
self._SetDirty()
2018-09-05 20:52:32 +00:00
def SetURLMatchKeysToParserKeys( self, url_match_keys_to_parser_keys ):
2017-12-06 22:06:56 +00:00
with self._lock:
2018-01-17 22:52:10 +00:00
self._url_match_keys_to_parser_keys = HydrusSerialisable.SerialisableBytesDictionary()
2017-12-06 22:06:56 +00:00
2018-01-17 22:52:10 +00:00
self._url_match_keys_to_parser_keys.update( url_match_keys_to_parser_keys )
2017-12-06 22:06:56 +00:00
2017-11-29 21:48:23 +00:00
self._SetDirty()
2018-09-05 20:52:32 +00:00
def SetURLMatchKeysToDisplay( self, url_match_keys_to_display ):
with self._lock:
self._url_match_keys_to_display = set()
self._url_match_keys_to_display.update( url_match_keys_to_display )
self._SetDirty()
2018-04-18 22:10:15 +00:00
def ShouldAssociateURLWithFiles( self, url ):
with self._lock:
url_match = self._GetURLMatch( url )
if url_match is None:
return True
return url_match.ShouldAssociateWithFiles()
2018-02-07 23:40:33 +00:00
def TryToLinkURLMatchesAndParsers( self ):
with self._lock:
new_url_match_keys_to_parser_keys = NetworkDomainManager.STATICLinkURLMatchesAndParsers( self._url_matches, self._parsers, self._url_match_keys_to_parser_keys )
self._url_match_keys_to_parser_keys.update( new_url_match_keys_to_parser_keys )
self._SetDirty()
2018-05-09 20:23:00 +00:00
def URLCanReferToMultipleFiles( self, url ):
2018-04-25 22:07:52 +00:00
with self._lock:
url_match = self._GetURLMatch( url )
if url_match is None:
return False
2018-05-09 20:23:00 +00:00
return url_match.CanReferToMultipleFiles()
2018-04-25 22:07:52 +00:00
2018-04-18 22:10:15 +00:00
def URLDefinitelyRefersToOneFile( self, url ):
with self._lock:
url_match = self._GetURLMatch( url )
if url_match is None:
return False
return url_match.RefersToOneFile()
2018-02-07 23:40:33 +00:00
@staticmethod
def STATICLinkURLMatchesAndParsers( url_matches, parsers, existing_url_match_keys_to_parser_keys ):
2018-08-01 20:44:57 +00:00
url_matches = list( url_matches )
NetworkDomainManager.STATICSortURLMatchesDescendingComplexity( url_matches )
2018-05-09 20:23:00 +00:00
parsers = list( parsers )
parsers.sort( key = lambda p: p.GetName() )
2018-02-07 23:40:33 +00:00
new_url_match_keys_to_parser_keys = {}
2018-08-01 20:44:57 +00:00
api_pairs = ConvertURLMatchesIntoAPIPairs( url_matches )
# anything that goes to an api url will be parsed by that api's parser--it can't have its own
api_pair_unparsable_url_matches = set()
for ( a, b ) in api_pairs:
2018-02-07 23:40:33 +00:00
2018-08-01 20:44:57 +00:00
api_pair_unparsable_url_matches.add( a )
#
# I have to do this backwards, going through parsers and then url_matches, so I can do a proper url match lookup like the real domain manager does it
# otherwise, if we iterate through url matches looking for parsers to match them, we have gallery url matches thinking they match parser post urls
# e.g.
# The page parser might say it supports https://danbooru.donmai.us/posts/3198277
# But the gallery url class might think it recognises that as https://danbooru.donmai.us/posts
#
# So we have to do the normal lookup in the proper descending complexity order, not searching any further than the first, correct match
for parser in parsers:
example_urls = parser.GetExampleURLs()
for example_url in example_urls:
for url_match in url_matches:
if url_match.Matches( example_url ):
# we have a match. this is the 'correct' match for this example url, and we should not search any more, so we break below
url_match_key = url_match.GetMatchKey()
parsable = url_match.IsParsable()
linkable = url_match_key not in existing_url_match_keys_to_parser_keys and url_match_key not in new_url_match_keys_to_parser_keys
if parsable and linkable:
new_url_match_keys_to_parser_keys[ url_match_key ] = parser.GetParserKey()
break
2018-02-07 23:40:33 +00:00
2018-08-01 20:44:57 +00:00
'''
#
for url_match in url_matches:
2018-02-07 23:40:33 +00:00
2018-08-01 20:44:57 +00:00
if not url_match.IsParsable() or url_match in api_pair_unparsable_url_matches:
2018-02-07 23:40:33 +00:00
2018-08-01 20:44:57 +00:00
continue
2018-02-07 23:40:33 +00:00
2018-08-01 20:44:57 +00:00
url_match_key = url_match.GetMatchKey()
2018-02-07 23:40:33 +00:00
2018-08-01 20:44:57 +00:00
if url_match_key in existing_url_match_keys_to_parser_keys:
continue
2018-02-07 23:40:33 +00:00
2018-08-01 20:44:57 +00:00
for parser in parsers:
2018-02-07 23:40:33 +00:00
2018-08-01 20:44:57 +00:00
example_urls = parser.GetExampleURLs()
2018-02-07 23:40:33 +00:00
2018-08-01 20:44:57 +00:00
if True in ( url_match.Matches( example_url ) for example_url in example_urls ):
2018-02-07 23:40:33 +00:00
2018-08-01 20:44:57 +00:00
new_url_match_keys_to_parser_keys[ url_match_key ] = parser.GetParserKey()
break
2018-02-07 23:40:33 +00:00
2018-08-01 20:44:57 +00:00
'''
return new_url_match_keys_to_parser_keys
@staticmethod
def STATICSortURLMatchesDescendingComplexity( url_matches ):
# we sort them in descending complexity so that
# post url/manga subpage
# is before
# post url
# also, put more 'precise' URL types above more typically permissive, in the order:
# file
# post
# gallery/watchable
# sorting in reverse, so higher number means more precise
def key( u_m ):
u_t = u_m.GetURLType()
if u_t == HC.URL_TYPE_FILE:
2018-02-07 23:40:33 +00:00
2018-08-01 20:44:57 +00:00
u_t_precision_value = 2
2018-02-07 23:40:33 +00:00
2018-08-01 20:44:57 +00:00
elif u_t == HC.URL_TYPE_POST:
2018-02-07 23:40:33 +00:00
2018-08-01 20:44:57 +00:00
u_t_precision_value = 1
else:
u_t_precision_value = 0
u_e = u_m.GetExampleURL()
return ( u_t_precision_value, u_e.count( '/' ), u_e.count( '=' ) )
2018-02-07 23:40:33 +00:00
2018-08-01 20:44:57 +00:00
url_matches.sort( key = key, reverse = True )
2018-02-07 23:40:33 +00:00
2017-10-04 17:51:58 +00:00
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_NETWORK_DOMAIN_MANAGER ] = NetworkDomainManager
2018-10-03 21:00:15 +00:00
class DomainMetadataPackage( HydrusSerialisable.SerialisableBase ):
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_DOMAIN_METADATA_PACKAGE
SERIALISABLE_NAME = 'Domain Metadata'
SERIALISABLE_VERSION = 1
def __init__( self, domain = None, headers_list = None, bandwidth_rules = None ):
HydrusSerialisable.SerialisableBase.__init__( self )
if domain is None:
domain = 'example.com'
self._domain = domain
self._headers_list = headers_list
self._bandwidth_rules = bandwidth_rules
def _GetSerialisableInfo( self ):
if self._bandwidth_rules is None:
serialisable_bandwidth_rules = self._bandwidth_rules
else:
serialisable_bandwidth_rules = self._bandwidth_rules.GetSerialisableTuple()
return ( self._domain, self._headers_list, serialisable_bandwidth_rules )
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
( self._domain, self._headers_list, serialisable_bandwidth_rules ) = serialisable_info
if serialisable_bandwidth_rules is None:
self._bandwidth_rules = serialisable_bandwidth_rules
else:
self._bandwidth_rules = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_bandwidth_rules )
def GetBandwidthRules( self ):
return self._bandwidth_rules
def GetDetailedSafeSummary( self ):
components = [ 'For domain "' + self._domain + '":' ]
if self.HasBandwidthRules():
m = 'Bandwidth rules: '
m += os.linesep
m += os.linesep.join( [ HydrusNetworking.ConvertBandwidthRuleToString( rule ) for rule in self._bandwidth_rules.GetRules() ] )
components.append( m )
if self.HasHeaders():
m = 'Headers: '
m += os.linesep
m += os.linesep.join( [ key + ' : ' + value + ' - ' + reason for ( key, value, reason ) in self._headers_list ] )
components.append( m )
joiner = os.linesep * 2
s = joiner.join( components )
return s
def GetDomain( self ):
return self._domain
def GetHeaders( self ):
return self._headers_list
def GetSafeSummary( self ):
components = []
if self.HasBandwidthRules():
components.append( 'bandwidth rules' )
if self.HasHeaders():
components.append( 'headers' )
return ' and '.join( components ) + ' - ' + self._domain
def HasBandwidthRules( self ):
return self._bandwidth_rules is not None
def HasHeaders( self ):
return self._headers_list is not None
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_DOMAIN_METADATA_PACKAGE ] = DomainMetadataPackage
2017-10-11 17:38:14 +00:00
class DomainValidationPopupProcess( object ):
2017-10-04 17:51:58 +00:00
def __init__( self, domain_manager, header_tuples ):
self._domain_manager = domain_manager
self._header_tuples = header_tuples
self._is_done = False
def IsDone( self ):
return self._is_done
def Start( self ):
try:
results = []
2017-10-11 17:38:14 +00:00
for ( network_context, key, value, reason ) in self._header_tuples:
2017-10-04 17:51:58 +00:00
job_key = ClientThreading.JobKey()
# generate question
2017-10-11 17:38:14 +00:00
question = 'For the network context ' + network_context.ToUnicode() + ', can the client set this header?'
question += os.linesep * 2
question += key + ': ' + value
question += os.linesep * 2
question += reason
2017-10-04 17:51:58 +00:00
job_key.SetVariable( 'popup_yes_no_question', question )
2017-10-11 17:38:14 +00:00
HG.client_controller.pub( 'message', job_key )
2017-10-04 17:51:58 +00:00
result = job_key.GetIfHasVariable( 'popup_yes_no_answer' )
while result is None:
if HG.view_shutdown:
return
time.sleep( 0.25 )
2017-10-11 17:38:14 +00:00
result = job_key.GetIfHasVariable( 'popup_yes_no_answer' )
2017-10-04 17:51:58 +00:00
if result:
approved = VALID_APPROVED
else:
approved = VALID_DENIED
self._domain_manager.SetHeaderValidation( network_context, key, approved )
finally:
self._is_done = True
2017-09-13 20:50:41 +00:00
2018-08-15 20:40:30 +00:00
GALLERY_INDEX_TYPE_PATH_COMPONENT = 0
GALLERY_INDEX_TYPE_PARAMETER = 1
2018-08-22 21:10:59 +00:00
class GalleryURLGenerator( HydrusSerialisable.SerialisableBaseNamed ):
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_GALLERY_URL_GENERATOR
SERIALISABLE_NAME = 'Gallery URL Generator'
SERIALISABLE_VERSION = 1
def __init__( self, name, gug_key = None, url_template = None, replacement_phrase = None, search_terms_separator = None, initial_search_text = None, example_search_text = None ):
if gug_key is None:
gug_key = HydrusData.GenerateKey()
if url_template is None:
url_template = 'https://example.com/search?q=%tags%&index=0'
if replacement_phrase is None:
replacement_phrase = '%tags%'
if search_terms_separator is None:
search_terms_separator = '+'
if initial_search_text is None:
initial_search_text = 'search tags'
if example_search_text is None:
example_search_text = 'blue_eyes blonde_hair'
HydrusSerialisable.SerialisableBaseNamed.__init__( self, name )
self._gallery_url_generator_key = gug_key
self._url_template = url_template
self._replacement_phrase = replacement_phrase
self._search_terms_separator = search_terms_separator
self._initial_search_text = initial_search_text
self._example_search_text = example_search_text
def _GetSerialisableInfo( self ):
serialisable_gallery_url_generator_key = self._gallery_url_generator_key.encode( 'hex' )
return ( serialisable_gallery_url_generator_key, self._url_template, self._replacement_phrase, self._search_terms_separator, self._initial_search_text, self._example_search_text )
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
( serialisable_gallery_url_generator_key, self._url_template, self._replacement_phrase, self._search_terms_separator, self._initial_search_text, self._example_search_text ) = serialisable_info
self._gallery_url_generator_key = serialisable_gallery_url_generator_key.decode( 'hex' )
2018-09-05 20:52:32 +00:00
def GenerateGalleryURL( self, query_text ):
2018-08-22 21:10:59 +00:00
if self._replacement_phrase == '':
raise HydrusExceptions.GUGException( 'No replacement phrase!' )
if self._replacement_phrase not in self._url_template:
raise HydrusExceptions.GUGException( 'Replacement phrase not in URL template!' )
2018-09-05 20:52:32 +00:00
( first_part, second_part ) = self._url_template.split( self._replacement_phrase, 1 )
search_phrase_seems_to_go_in_path = '?' not in first_part
search_terms = query_text.split( ' ' )
if search_phrase_seems_to_go_in_path:
# encode this gubbins since requests won't be able to do it
# this basically fixes e621 searches for 'male/female', which through some httpconf trickery are embedded in path but end up in a query, so need to be encoded right beforehand
2018-09-19 21:54:51 +00:00
# we need ToByteString as urllib.quote can't handle unicode hiragana etc...
2018-09-05 20:52:32 +00:00
2018-10-03 21:00:15 +00:00
encoded_search_terms = [ urllib.quote( HydrusData.ToByteString( search_term ), safe = '' ) for search_term in search_terms ]
else:
# when the separator is '+' but the permitted tags might be '6+girls', we run into fun internet land
encoded_search_terms = []
for search_term in search_terms:
if self._search_terms_separator in search_term:
search_term = urllib.quote( HydrusData.ToByteString( search_term ), safe = '' )
encoded_search_terms.append( search_term )
2018-09-05 20:52:32 +00:00
2018-08-22 21:10:59 +00:00
try:
2018-10-03 21:00:15 +00:00
search_phrase = self._search_terms_separator.join( encoded_search_terms )
2018-08-22 21:10:59 +00:00
gallery_url = self._url_template.replace( self._replacement_phrase, search_phrase )
except Exception as e:
raise HydrusExceptions.GUGException( unicode( e ) )
return gallery_url
2018-09-05 20:52:32 +00:00
def GenerateGalleryURLs( self, query_text ):
return ( self.GenerateGalleryURL( query_text ), )
2018-08-22 21:10:59 +00:00
def GetExampleURL( self ):
2018-09-05 20:52:32 +00:00
return self.GenerateGalleryURL( self._example_search_text )
2018-08-22 21:10:59 +00:00
2018-10-03 21:00:15 +00:00
def GetExampleURLs( self ):
return ( self.GetExampleURL(), )
2018-08-22 21:10:59 +00:00
def GetGUGKey( self ):
return self._gallery_url_generator_key
2018-09-05 20:52:32 +00:00
def GetGUGKeyAndName( self ):
return ( self._gallery_url_generator_key, self._name )
2018-08-22 21:10:59 +00:00
def GetInitialSearchText( self ):
return self._initial_search_text
2018-09-19 21:54:51 +00:00
def GetSafeSummary( self ):
return 'Downloader "' + self._name + '" - ' + ConvertURLIntoDomain( self.GetExampleURL() )
2018-08-22 21:10:59 +00:00
def GetURLTemplateVariables( self ):
return ( self._url_template, self._replacement_phrase, self._search_terms_separator, self._example_search_text )
2018-09-19 21:54:51 +00:00
def SetGUGKeyAndName( self, gug_key_and_name ):
( gug_key, name ) = gug_key_and_name
self._gallery_url_generator_key = gug_key
self._name = name
2018-09-05 20:52:32 +00:00
def IsFunctional( self ):
2018-09-12 21:36:26 +00:00
try:
example_url = self.GetExampleURL()
( url_type, match_name, can_parse ) = HG.client_controller.network_engine.domain_manager.GetURLParseCapability( example_url )
except:
return False
2018-09-05 20:52:32 +00:00
return can_parse
2018-08-22 21:10:59 +00:00
def RegenerateGUGKey( self ):
self._gallery_url_generator_key = HydrusData.GenerateKey()
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_GALLERY_URL_GENERATOR ] = GalleryURLGenerator
class NestedGalleryURLGenerator( HydrusSerialisable.SerialisableBaseNamed ):
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_NESTED_GALLERY_URL_GENERATOR
SERIALISABLE_NAME = 'Nested Gallery URL Generator'
SERIALISABLE_VERSION = 1
2018-09-05 20:52:32 +00:00
def __init__( self, name, gug_key = None, initial_search_text = None, gug_keys_and_names = None ):
if gug_key is None:
gug_key = HydrusData.GenerateKey()
2018-08-22 21:10:59 +00:00
if initial_search_text is None:
initial_search_text = 'search tags'
2018-09-05 20:52:32 +00:00
if gug_keys_and_names is None:
2018-08-22 21:10:59 +00:00
2018-09-05 20:52:32 +00:00
gug_keys_and_names = []
2018-08-22 21:10:59 +00:00
HydrusSerialisable.SerialisableBaseNamed.__init__( self, name )
2018-09-05 20:52:32 +00:00
self._gallery_url_generator_key = gug_key
2018-08-22 21:10:59 +00:00
self._initial_search_text = initial_search_text
2018-09-05 20:52:32 +00:00
self._gug_keys_and_names = gug_keys_and_names
2018-08-22 21:10:59 +00:00
def _GetSerialisableInfo( self ):
2018-09-05 20:52:32 +00:00
serialisable_gug_key = self._gallery_url_generator_key.encode( 'hex' )
serialisable_gug_keys_and_names = [ ( gug_key.encode( 'hex' ), gug_name ) for ( gug_key, gug_name ) in self._gug_keys_and_names ]
2018-08-22 21:10:59 +00:00
2018-09-05 20:52:32 +00:00
return ( serialisable_gug_key, self._initial_search_text, serialisable_gug_keys_and_names )
2018-08-22 21:10:59 +00:00
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
2018-09-05 20:52:32 +00:00
( serialisable_gug_key, self._initial_search_text, serialisable_gug_keys_and_names ) = serialisable_info
2018-08-22 21:10:59 +00:00
2018-09-05 20:52:32 +00:00
self._gallery_url_generator_key = serialisable_gug_key.decode( 'hex' )
self._gug_keys_and_names = [ ( gug_key.decode( 'hex' ), gug_name ) for ( gug_key, gug_name ) in serialisable_gug_keys_and_names ]
2018-08-22 21:10:59 +00:00
2018-09-05 20:52:32 +00:00
def GenerateGalleryURLs( self, query_text ):
2018-08-22 21:10:59 +00:00
gallery_urls = []
2018-09-05 20:52:32 +00:00
for gug_key_and_name in self._gug_keys_and_names:
2018-08-22 21:10:59 +00:00
2018-09-05 20:52:32 +00:00
gug = HG.client_controller.network_engine.domain_manager.GetGUG( gug_key_and_name )
2018-08-22 21:10:59 +00:00
if gug is not None:
2018-09-05 20:52:32 +00:00
gallery_urls.append( gug.GenerateGalleryURL( query_text ) )
2018-08-22 21:10:59 +00:00
return gallery_urls
2018-09-19 21:54:51 +00:00
def GetExampleURLs( self ):
example_urls = []
for gug_key_and_name in self._gug_keys_and_names:
gug = HG.client_controller.network_engine.domain_manager.GetGUG( gug_key_and_name )
if gug is not None:
example_urls.append( gug.GetExampleURL() )
return example_urls
2018-09-05 20:52:32 +00:00
def GetGUGKey( self ):
return self._gallery_url_generator_key
def GetGUGKeys( self ):
return [ gug_key for ( gug_key, gug_name ) in self._gug_keys_and_names ]
def GetGUGKeysAndNames( self ):
return list( self._gug_keys_and_names )
def GetGUGKeyAndName( self ):
return ( self._gallery_url_generator_key, self._name )
def GetGUGNames( self ):
return [ gug_name for ( gug_key, gug_name ) in self._gug_keys_and_names ]
2018-08-22 21:10:59 +00:00
def GetInitialSearchText( self ):
return self._initial_search_text
2018-09-19 21:54:51 +00:00
def GetSafeSummary( self ):
return 'Nested downloader "' + self._name + '" - ' + ', '.join( ( name for ( gug_key, name ) in self._gug_keys_and_names ) )
2018-09-05 20:52:32 +00:00
def IsFunctional( self ):
for gug_key_and_name in self._gug_keys_and_names:
gug = HG.client_controller.network_engine.domain_manager.GetGUG( gug_key_and_name )
if gug is not None:
if gug.IsFunctional():
return True
return False
def RegenerateGUGKey( self ):
self._gallery_url_generator_key = HydrusData.GenerateKey()
def RepairGUGs( self, available_gugs ):
available_keys_to_gugs = { gug.GetGUGKey() : gug for gug in available_gugs }
available_names_to_gugs = { gug.GetName() : gug for gug in available_gugs }
good_gug_keys_and_names = []
for ( gug_key, gug_name ) in self._gug_keys_and_names:
if gug_key in available_keys_to_gugs:
gug = available_keys_to_gugs[ gug_key ]
elif gug_name in available_names_to_gugs:
gug = available_names_to_gugs[ gug_name ]
else:
continue
good_gug_keys_and_names.append( ( gug.GetGUGKey(), gug.GetName() ) )
self._gug_keys_and_names = good_gug_keys_and_names
2018-09-19 21:54:51 +00:00
def SetGUGKeyAndName( self, gug_key_and_name ):
( gug_key, name ) = gug_key_and_name
self._gallery_url_generator_key = gug_key
self._name = name
2018-08-22 21:10:59 +00:00
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_NESTED_GALLERY_URL_GENERATOR ] = NestedGalleryURLGenerator
2017-09-27 21:52:54 +00:00
class URLMatch( HydrusSerialisable.SerialisableBaseNamed ):
2017-09-13 20:50:41 +00:00
2017-09-27 21:52:54 +00:00
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_URL_MATCH
2018-06-20 20:20:22 +00:00
SERIALISABLE_NAME = 'URL Class'
2018-08-29 20:20:41 +00:00
SERIALISABLE_VERSION = 6
2017-09-27 21:52:54 +00:00
2018-08-15 20:40:30 +00:00
def __init__( self, name, url_match_key = None, url_type = None, preferred_scheme = 'https', netloc = 'hostname.com', match_subdomains = False, keep_matched_subdomains = False, path_components = None, parameters = None, api_lookup_converter = None, can_produce_multiple_files = False, should_be_associated_with_files = True, gallery_index_type = None, gallery_index_identifier = None, gallery_index_delta = 1, example_url = 'https://hostname.com/post/page.php?id=123456&s=view' ):
2018-01-17 22:52:10 +00:00
if url_match_key is None:
url_match_key = HydrusData.GenerateKey()
2017-11-29 21:48:23 +00:00
if url_type is None:
url_type = HC.URL_TYPE_POST
2017-09-27 21:52:54 +00:00
if path_components is None:
2018-08-29 20:20:41 +00:00
path_components = []
2017-09-27 21:52:54 +00:00
2018-08-29 20:20:41 +00:00
path_components.append( ( ClientParsing.StringMatch( match_type = ClientParsing.STRING_MATCH_FIXED, match_value = 'post', example_string = 'post' ), None ) )
path_components.append( ( ClientParsing.StringMatch( match_type = ClientParsing.STRING_MATCH_FIXED, match_value = 'page.php', example_string = 'page.php' ), None ) )
2017-09-27 21:52:54 +00:00
if parameters is None:
2018-08-29 20:20:41 +00:00
parameters = {}
2017-09-27 21:52:54 +00:00
2018-08-29 20:20:41 +00:00
parameters[ 's' ] = ( ClientParsing.StringMatch( match_type = ClientParsing.STRING_MATCH_FIXED, match_value = 'view', example_string = 'view' ), None )
parameters[ 'id' ] = ( ClientParsing.StringMatch( match_type = ClientParsing.STRING_MATCH_FLEXIBLE, match_value = ClientParsing.NUMERIC, example_string = '123456' ), None )
2018-09-05 20:52:32 +00:00
parameters[ 'page' ] = ( ClientParsing.StringMatch( match_type = ClientParsing.STRING_MATCH_FLEXIBLE, match_value = ClientParsing.NUMERIC, example_string = '1' ), '1' )
2017-09-27 21:52:54 +00:00
2017-09-13 20:50:41 +00:00
2018-01-17 22:52:10 +00:00
if api_lookup_converter is None:
api_lookup_converter = ClientParsing.StringConverter( example_string = 'https://hostname.com/post/page.php?id=123456&s=view' )
2017-11-29 21:48:23 +00:00
# if the args are not serialisable stuff, lets overwrite here
path_components = HydrusSerialisable.SerialisableList( path_components )
parameters = HydrusSerialisable.SerialisableDictionary( parameters )
2017-09-13 20:50:41 +00:00
2017-09-27 21:52:54 +00:00
HydrusSerialisable.SerialisableBaseNamed.__init__( self, name )
2018-01-17 22:52:10 +00:00
self._url_match_key = url_match_key
2017-11-29 21:48:23 +00:00
self._url_type = url_type
2017-10-04 17:51:58 +00:00
self._preferred_scheme = preferred_scheme
self._netloc = netloc
2018-08-15 20:40:30 +00:00
2018-04-25 22:07:52 +00:00
self._match_subdomains = match_subdomains
self._keep_matched_subdomains = keep_matched_subdomains
2018-08-15 20:40:30 +00:00
self._can_produce_multiple_files = can_produce_multiple_files
self._should_be_associated_with_files = should_be_associated_with_files
2017-10-04 17:51:58 +00:00
self._path_components = path_components
self._parameters = parameters
2018-01-17 22:52:10 +00:00
self._api_lookup_converter = api_lookup_converter
2018-08-15 20:40:30 +00:00
self._gallery_index_type = gallery_index_type
self._gallery_index_identifier = gallery_index_identifier
self._gallery_index_delta = gallery_index_delta
2017-09-13 20:50:41 +00:00
2017-10-04 17:51:58 +00:00
self._example_url = example_url
2017-09-13 20:50:41 +00:00
def _ClipNetLoc( self, netloc ):
2018-04-25 22:07:52 +00:00
if self._keep_matched_subdomains:
2017-09-13 20:50:41 +00:00
# for domains like artistname.website.com, where removing the subdomain may break the url, we leave it alone
pass
else:
# for domains like mediaserver4.website.com, where multiple subdomains serve the same content as the larger site
2018-08-01 20:44:57 +00:00
if not DomainEqualsAnotherForgivingWWW( netloc, self._netloc ):
netloc = self._netloc
2017-09-13 20:50:41 +00:00
return netloc
2018-08-29 20:20:41 +00:00
def _ClipAndFleshOutPath( self, path, allow_clip = True ):
2017-09-13 20:50:41 +00:00
# /post/show/1326143/akunim-anthro-armband-armwear-clothed-clothing-fem
while path.startswith( '/' ):
path = path[ 1 : ]
# post/show/1326143/akunim-anthro-armband-armwear-clothed-clothing-fem
path_components = path.split( '/' )
2018-08-29 20:20:41 +00:00
if allow_clip or len( path_components ) < len( self._path_components ):
clipped_path_components = []
for ( index, ( string_match, default ) ) in enumerate( self._path_components ):
if len( path_components ) > index: # the given path has the value
clipped_path_component = path_components[ index ]
elif default is not None:
clipped_path_component = default
else:
raise HydrusExceptions.URLMatchException( 'Could not clip path--given url appeared to be too short!' )
clipped_path_components.append( clipped_path_component )
path = '/'.join( clipped_path_components )
2017-09-13 20:50:41 +00:00
# post/show/1326143
if len( path ) > 0:
path = '/' + path
# /post/show/1326143
return path
2018-08-29 20:20:41 +00:00
def _ClipAndFleshOutQuery( self, query, allow_clip = True ):
2017-09-13 20:50:41 +00:00
2018-08-22 21:10:59 +00:00
query_dict = ConvertQueryTextToDict( query )
2017-09-13 20:50:41 +00:00
2018-08-29 20:20:41 +00:00
if allow_clip:
query_dict = { key : value for ( key, value ) in query_dict.items() if key in self._parameters }
2017-09-13 20:50:41 +00:00
2018-08-29 20:20:41 +00:00
for ( key, ( string_match, default ) ) in self._parameters.items():
if key not in query_dict:
if default is None:
raise HydrusExceptions.URLMatchException( 'Could not flesh out query--no default for ' + key + ' defined!' )
else:
query_dict[ key ] = default
query = ConvertQueryDictToText( query_dict )
2017-09-13 20:50:41 +00:00
return query
2018-01-17 22:52:10 +00:00
def _GetSerialisableInfo( self ):
serialisable_url_match_key = self._url_match_key.encode( 'hex' )
2018-08-29 20:20:41 +00:00
serialisable_path_components = [ ( string_match.GetSerialisableTuple(), default ) for ( string_match, default ) in self._path_components ]
serialisable_parameters = [ ( key, ( string_match.GetSerialisableTuple(), default ) ) for ( key, ( string_match, default ) ) in self._parameters.items() ]
2018-01-17 22:52:10 +00:00
serialisable_api_lookup_converter = self._api_lookup_converter.GetSerialisableTuple()
2018-08-15 20:40:30 +00:00
return ( serialisable_url_match_key, self._url_type, self._preferred_scheme, self._netloc, self._match_subdomains, self._keep_matched_subdomains, serialisable_path_components, serialisable_parameters, serialisable_api_lookup_converter, self._can_produce_multiple_files, self._should_be_associated_with_files, self._gallery_index_type, self._gallery_index_identifier, self._gallery_index_delta, self._example_url )
2018-01-17 22:52:10 +00:00
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
2018-08-15 20:40:30 +00:00
( serialisable_url_match_key, self._url_type, self._preferred_scheme, self._netloc, self._match_subdomains, self._keep_matched_subdomains, serialisable_path_components, serialisable_parameters, serialisable_api_lookup_converter, self._can_produce_multiple_files, self._should_be_associated_with_files, self._gallery_index_type, self._gallery_index_identifier, self._gallery_index_delta, self._example_url ) = serialisable_info
2018-01-17 22:52:10 +00:00
self._url_match_key = serialisable_url_match_key.decode( 'hex' )
2018-08-29 20:20:41 +00:00
self._path_components = [ ( HydrusSerialisable.CreateFromSerialisableTuple( serialisable_string_match ), default ) for ( serialisable_string_match, default ) in serialisable_path_components ]
self._parameters = { key : ( HydrusSerialisable.CreateFromSerialisableTuple( serialisable_string_match ), default ) for ( key, ( serialisable_string_match, default ) ) in serialisable_parameters }
2018-01-17 22:52:10 +00:00
self._api_lookup_converter = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_api_lookup_converter )
def _UpdateSerialisableInfo( self, version, old_serialisable_info ):
if version == 1:
2018-04-25 22:07:52 +00:00
( url_type, preferred_scheme, netloc, match_subdomains, keep_matched_subdomains, serialisable_path_components, serialisable_parameters, example_url ) = old_serialisable_info
2018-01-17 22:52:10 +00:00
url_match_key = HydrusData.GenerateKey()
serialisable_url_match_key = url_match_key.encode( 'hex' )
api_lookup_converter = ClientParsing.StringConverter( example_string = example_url )
serialisable_api_lookup_converter = api_lookup_converter.GetSerialisableTuple()
2018-04-25 22:07:52 +00:00
new_serialisable_info = ( serialisable_url_match_key, url_type, preferred_scheme, netloc, match_subdomains, keep_matched_subdomains, serialisable_path_components, serialisable_parameters, serialisable_api_lookup_converter, example_url )
2018-01-17 22:52:10 +00:00
return ( 2, new_serialisable_info )
2018-04-18 22:10:15 +00:00
if version == 2:
2018-04-25 22:07:52 +00:00
( serialisable_url_match_key, url_type, preferred_scheme, netloc, match_subdomains, keep_matched_subdomains, serialisable_path_components, serialisable_parameters, serialisable_api_lookup_converter, example_url ) = old_serialisable_info
2018-04-18 22:10:15 +00:00
if url_type in ( HC.URL_TYPE_FILE, HC.URL_TYPE_POST ):
should_be_associated_with_files = True
else:
should_be_associated_with_files = False
2018-04-25 22:07:52 +00:00
new_serialisable_info = ( serialisable_url_match_key, url_type, preferred_scheme, netloc, match_subdomains, keep_matched_subdomains, serialisable_path_components, serialisable_parameters, serialisable_api_lookup_converter, should_be_associated_with_files, example_url )
2018-04-18 22:10:15 +00:00
return ( 3, new_serialisable_info )
2018-05-09 20:23:00 +00:00
if version == 3:
( serialisable_url_match_key, url_type, preferred_scheme, netloc, match_subdomains, keep_matched_subdomains, serialisable_path_components, serialisable_parameters, serialisable_api_lookup_converter, should_be_associated_with_files, example_url ) = old_serialisable_info
can_produce_multiple_files = False
new_serialisable_info = ( serialisable_url_match_key, url_type, preferred_scheme, netloc, match_subdomains, keep_matched_subdomains, serialisable_path_components, serialisable_parameters, serialisable_api_lookup_converter, can_produce_multiple_files, should_be_associated_with_files, example_url )
return ( 4, new_serialisable_info )
2018-08-15 20:40:30 +00:00
if version == 4:
( serialisable_url_match_key, url_type, preferred_scheme, netloc, match_subdomains, keep_matched_subdomains, serialisable_path_components, serialisable_parameters, serialisable_api_lookup_converter, can_produce_multiple_files, should_be_associated_with_files, example_url ) = old_serialisable_info
gallery_index_type = None
gallery_index_identifier = None
gallery_index_delta = 1
new_serialisable_info = ( serialisable_url_match_key, url_type, preferred_scheme, netloc, match_subdomains, keep_matched_subdomains, serialisable_path_components, serialisable_parameters, serialisable_api_lookup_converter, can_produce_multiple_files, should_be_associated_with_files, gallery_index_type, gallery_index_identifier, gallery_index_delta, example_url )
return ( 5, new_serialisable_info )
2018-08-29 20:20:41 +00:00
if version == 5:
( serialisable_url_match_key, url_type, preferred_scheme, netloc, match_subdomains, keep_matched_subdomains, serialisable_path_components, serialisable_parameters, serialisable_api_lookup_converter, can_produce_multiple_files, should_be_associated_with_files, gallery_index_type, gallery_index_identifier, gallery_index_delta, example_url ) = old_serialisable_info
path_components = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_path_components )
parameters = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_parameters )
path_components = [ ( value, None ) for value in path_components ]
parameters = { key : ( value, None ) for ( key, value ) in parameters.items() }
serialisable_path_components = [ ( string_match.GetSerialisableTuple(), default ) for ( string_match, default ) in path_components ]
serialisable_parameters = [ ( key, ( string_match.GetSerialisableTuple(), default ) ) for ( key, ( string_match, default ) ) in parameters.items() ]
new_serialisable_info = ( serialisable_url_match_key, url_type, preferred_scheme, netloc, match_subdomains, keep_matched_subdomains, serialisable_path_components, serialisable_parameters, serialisable_api_lookup_converter, can_produce_multiple_files, should_be_associated_with_files, gallery_index_type, gallery_index_identifier, gallery_index_delta, example_url )
return ( 6, new_serialisable_info )
2018-08-15 20:40:30 +00:00
def CanGenerateNextGalleryPage( self ):
if self._url_type == HC.URL_TYPE_GALLERY:
if self._gallery_index_type is not None:
return True
return False
2018-05-09 20:23:00 +00:00
def CanReferToMultipleFiles( self ):
is_a_gallery_page = self._url_type in ( HC.URL_TYPE_GALLERY, HC.URL_TYPE_WATCHABLE )
is_a_multipost_post_page = self._url_type == HC.URL_TYPE_POST and self._can_produce_multiple_files
return is_a_gallery_page or is_a_multipost_post_page
2018-01-17 22:52:10 +00:00
2018-08-29 20:20:41 +00:00
def ClippingIsAppropriate( self ):
return self._should_be_associated_with_files or self.UsesAPIURL()
2018-05-30 20:13:21 +00:00
def GetAPIURL( self, url = None ):
if url is None:
url = self._example_url
2018-01-17 22:52:10 +00:00
2018-07-11 20:23:51 +00:00
url = self.Normalise( url )
2018-01-17 22:52:10 +00:00
return self._api_lookup_converter.Convert( url )
2017-10-04 17:51:58 +00:00
def GetDomain( self ):
2017-12-06 22:06:56 +00:00
return ConvertDomainIntoSecondLevelDomain( HydrusData.ToByteString( self._netloc ) )
2017-10-04 17:51:58 +00:00
2017-11-29 21:48:23 +00:00
def GetExampleURL( self ):
return self._example_url
2018-08-15 20:40:30 +00:00
def GetGalleryIndexValues( self ):
return ( self._gallery_index_type, self._gallery_index_identifier, self._gallery_index_delta )
2018-01-17 22:52:10 +00:00
def GetMatchKey( self ):
return self._url_match_key
2018-08-15 20:40:30 +00:00
def GetNextGalleryPage( self, url ):
2018-08-29 20:20:41 +00:00
url = self.Normalise( url )
2018-08-15 20:40:30 +00:00
p = urlparse.urlparse( url )
scheme = p.scheme
netloc = p.netloc
path = p.path
query = p.query
params = ''
fragment = ''
if self._gallery_index_type == GALLERY_INDEX_TYPE_PATH_COMPONENT:
page_index_path_component_index = self._gallery_index_identifier
while path.startswith( '/' ):
path = path[ 1 : ]
path_components = path.split( '/' )
try:
page_index = path_components[ page_index_path_component_index ]
except IndexError:
raise HydrusExceptions.URLMatchException( 'Could not generate next gallery page--not enough path components!' )
try:
page_index = int( page_index )
except:
raise HydrusExceptions.URLMatchException( 'Could not generate next gallery page--index component was not an integer!' )
2018-08-22 21:10:59 +00:00
path_components[ page_index_path_component_index ] = str( page_index + self._gallery_index_delta )
2018-08-15 20:40:30 +00:00
path = '/' + '/'.join( path_components )
elif self._gallery_index_type == GALLERY_INDEX_TYPE_PARAMETER:
page_index_name = self._gallery_index_identifier
2018-08-22 21:10:59 +00:00
query_dict = ConvertQueryTextToDict( query )
2018-08-15 20:40:30 +00:00
if page_index_name not in query_dict:
raise HydrusExceptions.URLMatchException( 'Could not generate next gallery page--did not find ' + str( self._gallery_index_identifier ) + ' in parameters!' )
page_index = query_dict[ page_index_name ]
try:
page_index = int( page_index )
except:
raise HydrusExceptions.URLMatchException( 'Could not generate next gallery page--index component was not an integer!' )
query_dict[ page_index_name ] = page_index + self._gallery_index_delta
2018-08-22 21:10:59 +00:00
query = ConvertQueryDictToText( query_dict )
2018-08-15 20:40:30 +00:00
else:
raise NotImplementedError( 'Did not understand the next gallery page rules!' )
r = urlparse.ParseResult( scheme, netloc, path, params, query, fragment )
return r.geturl()
2018-09-19 21:54:51 +00:00
def GetSafeSummary( self ):
return 'URL Class "' + self._name + '" - ' + ConvertURLIntoDomain( self.GetExampleURL() )
2017-11-29 21:48:23 +00:00
def GetURLType( self ):
return self._url_type
2017-12-06 22:06:56 +00:00
def IsGalleryURL( self ):
return self._url_type == HC.URL_TYPE_GALLERY
2018-01-17 22:52:10 +00:00
def IsParsable( self ):
return self._url_type in ( HC.URL_TYPE_POST, HC.URL_TYPE_GALLERY, HC.URL_TYPE_WATCHABLE )
2017-12-06 22:06:56 +00:00
def IsPostURL( self ):
return self._url_type == HC.URL_TYPE_POST
2017-12-13 22:33:07 +00:00
def IsWatchableURL( self ):
return self._url_type == HC.URL_TYPE_WATCHABLE
2018-01-17 22:52:10 +00:00
def Matches( self, url ):
try:
self.Test( url )
return True
except HydrusExceptions.URLMatchException:
return False
2017-09-27 21:52:54 +00:00
def Normalise( self, url ):
2017-09-13 20:50:41 +00:00
p = urlparse.urlparse( url )
scheme = self._preferred_scheme
params = ''
fragment = ''
2018-08-29 20:20:41 +00:00
if self.ClippingIsAppropriate():
2018-01-17 22:52:10 +00:00
netloc = self._ClipNetLoc( p.netloc )
2018-08-29 20:20:41 +00:00
path = self._ClipAndFleshOutPath( p.path )
query = self._ClipAndFleshOutQuery( p.query )
2018-01-17 22:52:10 +00:00
else:
netloc = p.netloc
2018-08-29 20:20:41 +00:00
path = self._ClipAndFleshOutPath( p.path, allow_clip = False )
query = self._ClipAndFleshOutQuery( p.query, allow_clip = False )
2018-01-17 22:52:10 +00:00
2017-09-13 20:50:41 +00:00
r = urlparse.ParseResult( scheme, netloc, path, params, query, fragment )
return r.geturl()
2018-05-09 20:23:00 +00:00
def RefersToOneFile( self ):
2018-04-25 22:07:52 +00:00
2018-05-09 20:23:00 +00:00
is_a_direct_file_page = self._url_type == HC.URL_TYPE_FILE
2018-04-25 22:07:52 +00:00
2018-05-09 20:23:00 +00:00
is_a_single_file_post_page = self._url_type == HC.URL_TYPE_POST and not self._can_produce_multiple_files
2018-04-18 22:10:15 +00:00
2018-05-09 20:23:00 +00:00
return is_a_direct_file_page or is_a_single_file_post_page
2018-04-18 22:10:15 +00:00
2018-09-19 21:54:51 +00:00
def RegenerateMatchKey( self ):
2018-01-17 22:52:10 +00:00
self._url_match_key = HydrusData.GenerateKey()
2018-09-19 21:54:51 +00:00
def SetExampleURL( self, example_url ):
self._example_url = example_url
def SetMatchKey( self, match_key ):
self._url_match_key = match_key
2018-04-18 22:10:15 +00:00
def ShouldAssociateWithFiles( self ):
return self._should_be_associated_with_files
2017-09-13 20:50:41 +00:00
def Test( self, url ):
p = urlparse.urlparse( url )
2018-04-25 22:07:52 +00:00
if self._match_subdomains:
2017-11-29 21:48:23 +00:00
if p.netloc != self._netloc and not p.netloc.endswith( '.' + self._netloc ):
raise HydrusExceptions.URLMatchException( p.netloc + ' (potentially excluding subdomains) did not match ' + self._netloc )
else:
2018-04-25 22:07:52 +00:00
if not DomainEqualsAnotherForgivingWWW( p.netloc, self._netloc ):
2017-11-29 21:48:23 +00:00
raise HydrusExceptions.URLMatchException( p.netloc + ' did not match ' + self._netloc )
2017-09-13 20:50:41 +00:00
url_path = p.path
while url_path.startswith( '/' ):
url_path = url_path[ 1 : ]
2017-11-29 21:48:23 +00:00
url_path_components = url_path.split( '/' )
2017-09-13 20:50:41 +00:00
2018-08-29 20:20:41 +00:00
for ( index, ( string_match, default ) ) in enumerate( self._path_components ):
2017-09-13 20:50:41 +00:00
2018-08-29 20:20:41 +00:00
if len( url_path_components ) > index:
2017-11-22 21:03:07 +00:00
2018-08-29 20:20:41 +00:00
url_path_component = url_path_components[ index ]
2017-09-13 20:50:41 +00:00
2018-08-29 20:20:41 +00:00
try:
string_match.Test( url_path_component )
except HydrusExceptions.StringMatchException as e:
raise HydrusExceptions.URLMatchException( HydrusData.ToUnicode( e ) )
2017-11-22 21:03:07 +00:00
2018-08-29 20:20:41 +00:00
elif default is None:
raise HydrusExceptions.URLMatchException( url_path + ' did not have enough of the required path components!' )
2017-09-13 20:50:41 +00:00
2018-08-22 21:10:59 +00:00
url_parameters = ConvertQueryTextToDict( p.query )
2017-11-29 21:48:23 +00:00
2018-08-29 20:20:41 +00:00
for ( key, ( string_match, default ) ) in self._parameters.items():
2017-09-13 20:50:41 +00:00
2017-11-29 21:48:23 +00:00
if key not in url_parameters:
2017-09-13 20:50:41 +00:00
2018-08-29 20:20:41 +00:00
if default is None:
raise HydrusExceptions.URLMatchException( key + ' not found in ' + p.query )
else:
continue
2017-09-13 20:50:41 +00:00
2017-11-29 21:48:23 +00:00
value = url_parameters[ key ]
2017-09-13 20:50:41 +00:00
2017-11-22 21:03:07 +00:00
try:
2017-11-29 21:48:23 +00:00
string_match.Test( value )
2017-09-13 20:50:41 +00:00
2017-11-22 21:03:07 +00:00
except HydrusExceptions.StringMatchException as e:
2018-03-14 21:01:02 +00:00
raise HydrusExceptions.URLMatchException( HydrusData.ToUnicode( e ) )
2017-09-13 20:50:41 +00:00
2017-11-29 21:48:23 +00:00
def ToTuple( self ):
2018-05-09 20:23:00 +00:00
return ( self._url_type, self._preferred_scheme, self._netloc, self._match_subdomains, self._keep_matched_subdomains, self._path_components, self._parameters, self._api_lookup_converter, self._can_produce_multiple_files, self._should_be_associated_with_files, self._example_url )
2017-11-29 21:48:23 +00:00
2018-02-07 23:40:33 +00:00
def UsesAPIURL( self ):
return self._api_lookup_converter.MakesChanges()
2017-11-29 21:48:23 +00:00
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_URL_MATCH ] = URLMatch