hydrus/include/ClientNetworkingDomain.py

import ClientConstants as CC
import ClientParsing
import ClientThreading
import collections
import HydrusConstants as HC
import HydrusGlobals as HG
import HydrusData
import HydrusExceptions
import HydrusSerialisable
import threading
import time
import urlparse

def ConvertDomainIntoAllApplicableDomains( domain ):
    
    domains = []
    
    while domain.count( '.' ) > 0:
        
        # let's discard www.blah.com so we don't end up tracking it separately to blah.com--there's not much point!
        startswith_www = domain.count( '.' ) > 1 and domain.startswith( 'www' )
        
        if not startswith_www:
            
            domains.append( domain )
            
        
        domain = '.'.join( domain.split( '.' )[1:] ) # i.e. strip off the leftmost subdomain maps.google.com -> google.com
        
    
    return domains
    
def ConvertURLIntoDomain( url ):
    
    parser_result = urlparse.urlparse( url )
    
    domain = HydrusData.ToByteString( parser_result.netloc )
    
    return domain
    
VALID_DENIED = 0
VALID_APPROVED = 1
VALID_UNKNOWN = 2
# this should do network_contexts->user-agent as well, with some kind of approval system in place
    # approval needs a new queue in the network engine. this will eventually test downloader validity and so on. failable at that stage
    # user-agent info should be exportable/importable on the ui as well
# eventually extend this to do urlmatch->downloader_key, I think.
# hence we'll be able to do some kind of dnd_url->new thread watcher page
# hide urls on media viewer based on domain
# decide whether we want to add this to the dirtyobjects loop, and it which case, if anything is appropriate to store in the db separately
  # hence making this a serialisableobject itself.
class NetworkDomainManager( HydrusSerialisable.SerialisableBase ):
    
    SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_NETWORK_BANDWIDTH_MANAGER
    SERIALISABLE_VERSION = 1
    
    def __init__( self ):
        
        HydrusSerialisable.SerialisableBase.__init__( self )
        
        self.engine = None
        
        self._url_matches = HydrusSerialisable.SerialisableList()
        self._network_contexts_to_custom_headers = {}
        
        self._domains_to_url_matches = collections.defaultdict( list )
        
        self._dirty = False
        
        self._lock = threading.Lock()
        
        self._RecalcCache()
        
    
    def _GetURLMatch( self, url ):
        
        domain = ConvertURLIntoDomain( url )
        
        if domain in self._domains_to_url_matches:
            
            url_matches = self._domains_to_url_matches[ domain ]
            
            # it would be nice to somehow sort these based on descending complexity
            # maybe by length of example url
            # in this way, url matches can have overlapping desmaign
            # e.g. 'post url' vs 'post url, manga subpage'
            
            for url_match in url_matches:
                
                ( result_bool, result_reason ) = url_match.Test( url )
                
                if result_bool:
                    
                    return url_match
                    
                
            
        
        return None
        
    
    def _RecalcCache( self ):
        
        self._domains_to_url_matches = collections.defaultdict( list )
        
        for url_match in self._url_matches:
            
            domain = url_match.GetDomain()
            
            self._domains_to_url_matches[ domain ].append( url_match )
            
        
    
    def _SetDirty( self ):
        
        self._dirty = True
        
    
    def CanValidateInPopup( self, network_contexts ):
        
        # we can always do this for headers
        
        return True
        
    
    def GenerateValidationProcess( self, network_contexts ):
        
        # generate a process that will, when threadcalled maybe with .Start() , ask the user, one after another, all the key-value pairs
        # Should (network context) apply "(key)" header "(value)"?
        # Reason given is: "You need this to make it work lol."
        # once all the yes/nos are set, update db, reinitialise domain manager, set IsDone to true.
        
        pass
        
    
    def GetCustomHeaders( self, network_contexts ):
        
        keys_to_values = {}
        
        with self._lock:
            
            pass
            
            # good order is global = least powerful, which I _think_ is how these come.
            # e.g. a site User-Agent should overwrite a global default
            
        
        return keys_to_values
        
    
    def GetDownloader( self, url ):
        
        with self._lock:
            
            # this might be better as getdownloaderkey, but we'll see how it shakes out
            # might also be worth being a getifhasdownloader
            
            # match the url to a url_match, then lookup that in a 'this downloader can handle this url_match type' dict that we'll manage
            
            pass
            
        
    
    def IsValid( self, network_contexts ):
        
        # for now, let's say that denied headers are simply not added, not that they invalidate a query
        
        for network_context in network_contexts:
            
            if network_context in self._network_contexts_to_custom_headers:
                
                custom_headers = self._network_contexts_to_custom_headers[ network_context ]
                
                for ( key, value, approved, reason ) in custom_headers:
                    
                    if approved == VALID_UNKNOWN:
                        
                        return False
                        
                    
                
            
        
        return True
        
    
    def NormaliseURL( self, url ):
        
        # call this before an entry into a seed cache or the db
        # use it in the dialog to review mass db-level changes
        
        with self._lock:
            
            url_match = self._GetURLMatch( url )
            
            if url_match is None:
                
                return url
                
            
            normalised_url = url_match.Normalise( url )
            
            return normalised_url
            
        
    
    def SetClean( self ):
        
        with self._lock:
            
            self._dirty = False
            
        
    
    def SetHeaderValidation( self, network_context, key, approved ):
        
        with self._lock:
            
            custom_headers = self._network_contexts_to_custom_headers[ network_context ]
            
            
            
        
    
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_NETWORK_DOMAIN_MANAGER ] = NetworkDomainManager

class DomainValidationProcess( object ):
    
    def __init__( self, domain_manager, header_tuples ):
        
        self._domain_manager = domain_manager
        
        self._header_tuples = header_tuples
        
        self._is_done = False
        
    
    def IsDone( self ):
        
        return self._is_done
        
    
    def Start( self ):
        
        try:
            
            results = []
            
            for ( network_context, key, value, approval_reason ) in self._header_tuples:
                
                job_key = ClientThreading.JobKey()
                
                # generate question
                question = 'intro text ' + approval_reason
                
                job_key.SetVariable( 'popup_yes_no_question', question )
                
                # pub it
                
                result = job_key.GetIfHasVariable( 'popup_yes_no_answer' )
                
                while result is None:
                    
                    if HG.view_shutdown:
                        
                        return
                        
                    
                    time.sleep( 0.25 )
                    
                
                if result:
                    
                    approved = VALID_APPROVED
                    
                else:
                    
                    approved = VALID_DENIED
                    
                
                self._domain_manager.SetHeaderValidation( network_context, key, approved )
                
            
        finally:
            
            self._is_done = True
            
        
    
# make this serialisable--maybe with name as the name of a named serialisable
# __hash__ for name? not sure
# maybe all serialisable should return __hash__ of ( type, name ) if they don't already
# that might lead to problems elsewhere, so careful
class URLMatch( HydrusSerialisable.SerialisableBaseNamed ):
    
    SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_URL_MATCH
    SERIALISABLE_VERSION = 1
    
    def __init__( self, name, preferred_scheme = 'https', netloc = 'hostname.com', subdomain_is_important = False, path_components = None, parameters = None, example_url = 'https://hostname.com/post/page.php?id=123456&s=view' ):
        
        if path_components is None:
            
            path_components = HydrusSerialisable.SerialisableList()
            
            path_components.append( ClientParsing.StringMatch( match_type = ClientParsing.STRING_MATCH_FIXED, match_value = 'post', example_string = 'post' ) )
            path_components.append( ClientParsing.StringMatch( match_type = ClientParsing.STRING_MATCH_FIXED, match_value = 'page.php', example_string = 'page.php' ) )
            
        
        if parameters is None:
            
            parameters = HydrusSerialisable.SerialisableDictionary()
            
            parameters[ 's' ] = ClientParsing.StringMatch( match_type = ClientParsing.STRING_MATCH_FIXED, match_value = 'view', example_string = 'view' )
            parameters[ 'id' ] = ClientParsing.StringMatch( match_type = ClientParsing.STRING_MATCH_FLEXIBLE, match_value = ClientParsing.NUMERIC, example_string = '123456' )
            
        
        # an edit dialog panel for this that has example url and testing of current values
        # a parent panel or something that lists all current urls in the db that match and how they will be clipped, is this ok? kind of thing.
        
        HydrusSerialisable.SerialisableBaseNamed.__init__( self, name )
        
        self._preferred_scheme = preferred_scheme
        self._netloc = netloc
        self._subdomain_is_important = subdomain_is_important
        self._path_components = path_components
        self._parameters = parameters
        
        self._example_url = example_url
        
    
    def _ClipNetLoc( self, netloc ):
        
        if self._subdomain_is_important:
            
            # for domains like artistname.website.com, where removing the subdomain may break the url, we leave it alone
            
            pass
            
        else:
            
            # for domains like mediaserver4.website.com, where multiple subdomains serve the same content as the larger site
            # if the main site doesn't deliver the same content as the subdomain, then subdomain_is_important
            
            netloc = self._netloc
            
        
        return netloc
        
    
    def _GetSerialisableInfo( self ):
        
        serialisable_path_components = self._path_components.GetSerialisableTuple()
        serialisable_parameters = self._parameters.GetSerialisableTuple()
        
        return ( self._preferred_scheme, self._netloc, self._subdomain_is_important, serialisable_path_components, serialisable_parameters, self._example_url )
        
    
    def _InitialiseFromSerialisableInfo( self, serialisable_info ):
        
        ( self._preferred_scheme, self._netloc, self._subdomain_is_important, serialisable_path_components, serialisable_parameters, self._example_url ) = serialisable_info
        
        self._path_components = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_path_components )
        self._parameters = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_parameters )
        
    
    def _ClipPath( self, path ):
        
        # /post/show/1326143/akunim-anthro-armband-armwear-clothed-clothing-fem
        
        while path.startswith( '/' ):
            
            path = path[ 1 : ]
            
        
        # post/show/1326143/akunim-anthro-armband-armwear-clothed-clothing-fem
        
        path_components = path.split( '/' )
        
        path = '/'.join( path_components[ : len( self._path_components ) ] )
        
        # post/show/1326143
        
        if len( path ) > 0:
            
            path = '/' + path
            
        
        # /post/show/1326143
        
        return path
        
    
    def _ClipQuery( self, query ):
        
        valid_parameters = []
        
        for ( key, value ) in urlparse.parse_qsl( query ):
            
            if key in self._parameters:
                
                valid_parameters.append( ( key, value ) )
                
            
        
        valid_parameters.sort()
        
        query = '&'.join( ( key + '=' + value for ( key, value ) in valid_parameters ) )
        
        return query
        
    
    def GetDomain( self ):
        
        return ConvertURLIntoDomain( self._example_url )
        
    
    def Normalise( self, url ):
        
        p = urlparse.urlparse( url )
        
        scheme = self._preferred_scheme
        netloc = self._ClipNetLoc( p.netloc )
        path = self._ClipPath( p.path )
        params = ''
        query = self._ClipQuery( p.query )
        fragment = ''
        
        r = urlparse.ParseResult( scheme, netloc, path, params, query, fragment )
        
        return r.geturl()
        
    
    def Test( self, url ):
        
        # split the url into parts according to urlparse
        p = urlparse.urlparse( url )
        
        # test p.netloc with netloc, taking subdomain_is_important into account
        
        url_path = p.path
        
        while url_path.startswith( '/' ):
            
            url_path = url_path[ 1 : ]
            
        
        url_path_components = p.path.split( '/' )
        
        if len( url_path_components ) < len( self._path_components ):
            
            return ( False, p.path + ' did not have ' + str( len( self._path_components ) ) + ' components' )
            
        
        for ( url_path_component, expected_path_component ) in zip( url_path_components, self._path_components ):
            
            ( bool_result, reason ) = expected_path_component.Test( url_path_component )
            
            if not bool_result:
                
                return ( bool_result, reason )
                
            
        
        url_parameters_list = urlparse.parse_qsl( p.query )
        
        if len( url_parameters_list ) < len( self._parameters ):
            
            return ( False, p.query + ' did not have ' + str( len( self._parameters ) ) + ' value pairs' )
            
        
        for ( key, url_value ) in url_parameters_list:
            
            if key not in self._parameters:
                
                return ( False, key + ' not found in ' + p.query )
                
            
            expected_value = self._parameters[ key ]
            
            ( bool_result, reason ) = expected_value.Test( url_value )
            
            if not bool_result:
                
                return ( bool_result, reason )
                
            
        
        return ( True, 'good' )
        
    
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_URLS_IMPORT ] = URLMatch
Version 273 2017-09-13 20:50:41 +00:00			`import ClientConstants as CC`
Version 275 2017-09-27 21:52:54 +00:00			`import ClientParsing`
Version 276 2017-10-04 17:51:58 +00:00			`import ClientThreading`
			`import collections`
Version 273 2017-09-13 20:50:41 +00:00			`import HydrusConstants as HC`
			`import HydrusGlobals as HG`
			`import HydrusData`
			`import HydrusExceptions`
Version 275 2017-09-27 21:52:54 +00:00			`import HydrusSerialisable`
Version 273 2017-09-13 20:50:41 +00:00			`import threading`
Version 276 2017-10-04 17:51:58 +00:00			`import time`
Version 273 2017-09-13 20:50:41 +00:00			`import urlparse`

Version 276 2017-10-04 17:51:58 +00:00			`def ConvertDomainIntoAllApplicableDomains( domain ):`

			`domains = []`

			`while domain.count( '.' ) > 0:`

			`# let's discard www.blah.com so we don't end up tracking it separately to blah.com--there's not much point!`
			`startswith_www = domain.count( '.' ) > 1 and domain.startswith( 'www' )`

			`if not startswith_www:`

			`domains.append( domain )`


			`domain = '.'.join( domain.split( '.' )[1:] ) # i.e. strip off the leftmost subdomain maps.google.com -> google.com`


			`return domains`

			`def ConvertURLIntoDomain( url ):`

			`parser_result = urlparse.urlparse( url )`

			`domain = HydrusData.ToByteString( parser_result.netloc )`

			`return domain`

			`VALID_DENIED = 0`
			`VALID_APPROVED = 1`
			`VALID_UNKNOWN = 2`
Version 273 2017-09-13 20:50:41 +00:00			`# this should do network_contexts->user-agent as well, with some kind of approval system in place`
Version 275 2017-09-27 21:52:54 +00:00			`# approval needs a new queue in the network engine. this will eventually test downloader validity and so on. failable at that stage`
Version 273 2017-09-13 20:50:41 +00:00			`# user-agent info should be exportable/importable on the ui as well`
Version 275 2017-09-27 21:52:54 +00:00			`# eventually extend this to do urlmatch->downloader_key, I think.`
Version 273 2017-09-13 20:50:41 +00:00			`# hence we'll be able to do some kind of dnd_url->new thread watcher page`
Version 275 2017-09-27 21:52:54 +00:00			`# hide urls on media viewer based on domain`
			`# decide whether we want to add this to the dirtyobjects loop, and it which case, if anything is appropriate to store in the db separately`
			`# hence making this a serialisableobject itself.`
Version 276 2017-10-04 17:51:58 +00:00			`class NetworkDomainManager( HydrusSerialisable.SerialisableBase ):`

			`SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_NETWORK_BANDWIDTH_MANAGER`
			`SERIALISABLE_VERSION = 1`
Version 273 2017-09-13 20:50:41 +00:00
Version 276 2017-10-04 17:51:58 +00:00			`def __init__( self ):`
Version 273 2017-09-13 20:50:41 +00:00
Version 276 2017-10-04 17:51:58 +00:00			`HydrusSerialisable.SerialisableBase.__init__( self )`

			`self.engine = None`

			`self._url_matches = HydrusSerialisable.SerialisableList()`
			`self._network_contexts_to_custom_headers = {}`

			`self._domains_to_url_matches = collections.defaultdict( list )`

			`self._dirty = False`
Version 273 2017-09-13 20:50:41 +00:00
			`self._lock = threading.Lock()`

Version 276 2017-10-04 17:51:58 +00:00			`self._RecalcCache()`
Version 273 2017-09-13 20:50:41 +00:00

Version 275 2017-09-27 21:52:54 +00:00			`def _GetURLMatch( self, url ):`

Version 276 2017-10-04 17:51:58 +00:00			`domain = ConvertURLIntoDomain( url )`
Version 275 2017-09-27 21:52:54 +00:00
			`if domain in self._domains_to_url_matches:`

			`url_matches = self._domains_to_url_matches[ domain ]`

			`# it would be nice to somehow sort these based on descending complexity`
			`# maybe by length of example url`
			`# in this way, url matches can have overlapping desmaign`
			`# e.g. 'post url' vs 'post url, manga subpage'`

			`for url_match in url_matches:`

			`( result_bool, result_reason ) = url_match.Test( url )`

			`if result_bool:`

			`return url_match`




			`return None`


Version 276 2017-10-04 17:51:58 +00:00			`def _RecalcCache( self ):`

			`self._domains_to_url_matches = collections.defaultdict( list )`
Version 273 2017-09-13 20:50:41 +00:00
Version 276 2017-10-04 17:51:58 +00:00			`for url_match in self._url_matches:`

			`domain = url_match.GetDomain()`

			`self._domains_to_url_matches[ domain ].append( url_match )`

Version 273 2017-09-13 20:50:41 +00:00
Version 276 2017-10-04 17:51:58 +00:00
			`def _SetDirty( self ):`
Version 273 2017-09-13 20:50:41 +00:00
Version 276 2017-10-04 17:51:58 +00:00			`self._dirty = True`
Version 273 2017-09-13 20:50:41 +00:00

Version 276 2017-10-04 17:51:58 +00:00			`def CanValidateInPopup( self, network_contexts ):`
Version 275 2017-09-27 21:52:54 +00:00
Version 276 2017-10-04 17:51:58 +00:00			`# we can always do this for headers`
Version 275 2017-09-27 21:52:54 +00:00
Version 276 2017-10-04 17:51:58 +00:00			`return True`
Version 275 2017-09-27 21:52:54 +00:00

Version 276 2017-10-04 17:51:58 +00:00			`def GenerateValidationProcess( self, network_contexts ):`
Version 275 2017-09-27 21:52:54 +00:00
Version 276 2017-10-04 17:51:58 +00:00			`# generate a process that will, when threadcalled maybe with .Start() , ask the user, one after another, all the key-value pairs`
			`# Should (network context) apply "(key)" header "(value)"?`
			`# Reason given is: "You need this to make it work lol."`
			`# once all the yes/nos are set, update db, reinitialise domain manager, set IsDone to true.`
Version 275 2017-09-27 21:52:54 +00:00
			`pass`


			`def GetCustomHeaders( self, network_contexts ):`
Version 273 2017-09-13 20:50:41 +00:00
Version 276 2017-10-04 17:51:58 +00:00			`keys_to_values = {}`

Version 273 2017-09-13 20:50:41 +00:00			`with self._lock:`

Version 275 2017-09-27 21:52:54 +00:00			`pass`

			`# good order is global = least powerful, which I _think_ is how these come.`
Version 276 2017-10-04 17:51:58 +00:00			`# e.g. a site User-Agent should overwrite a global default`
Version 273 2017-09-13 20:50:41 +00:00
Version 275 2017-09-27 21:52:54 +00:00
Version 276 2017-10-04 17:51:58 +00:00			`return keys_to_values`

Version 275 2017-09-27 21:52:54 +00:00
			`def GetDownloader( self, url ):`

			`with self._lock:`

			`# this might be better as getdownloaderkey, but we'll see how it shakes out`
			`# might also be worth being a getifhasdownloader`

			`# match the url to a url_match, then lookup that in a 'this downloader can handle this url_match type' dict that we'll manage`

			`pass`



Version 276 2017-10-04 17:51:58 +00:00			`def IsValid( self, network_contexts ):`
Version 275 2017-09-27 21:52:54 +00:00
Version 276 2017-10-04 17:51:58 +00:00			`# for now, let's say that denied headers are simply not added, not that they invalidate a query`
Version 275 2017-09-27 21:52:54 +00:00
Version 276 2017-10-04 17:51:58 +00:00			`for network_context in network_contexts:`

			`if network_context in self._network_contexts_to_custom_headers:`

			`custom_headers = self._network_contexts_to_custom_headers[ network_context ]`

			`for ( key, value, approved, reason ) in custom_headers:`

			`if approved == VALID_UNKNOWN:`

			`return False`





			`return True`
Version 275 2017-09-27 21:52:54 +00:00

			`def NormaliseURL( self, url ):`

			`# call this before an entry into a seed cache or the db`
			`# use it in the dialog to review mass db-level changes`

			`with self._lock:`

			`url_match = self._GetURLMatch( url )`

			`if url_match is None:`
Version 273 2017-09-13 20:50:41 +00:00
Version 275 2017-09-27 21:52:54 +00:00			`return url`
Version 273 2017-09-13 20:50:41 +00:00

Version 275 2017-09-27 21:52:54 +00:00			`normalised_url = url_match.Normalise( url )`

			`return normalised_url`
Version 273 2017-09-13 20:50:41 +00:00


Version 276 2017-10-04 17:51:58 +00:00			`def SetClean( self ):`
Version 273 2017-09-13 20:50:41 +00:00
			`with self._lock:`

Version 276 2017-10-04 17:51:58 +00:00			`self._dirty = False`



			`def SetHeaderValidation( self, network_context, key, approved ):`

			`with self._lock:`

			`custom_headers = self._network_contexts_to_custom_headers[ network_context ]`





			`HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_NETWORK_DOMAIN_MANAGER ] = NetworkDomainManager`

			`class DomainValidationProcess( object ):`

			`def __init__( self, domain_manager, header_tuples ):`

			`self._domain_manager = domain_manager`

			`self._header_tuples = header_tuples`

			`self._is_done = False`


			`def IsDone( self ):`

			`return self._is_done`


			`def Start( self ):`

			`try:`

			`results = []`

			`for ( network_context, key, value, approval_reason ) in self._header_tuples:`

			`job_key = ClientThreading.JobKey()`

			`# generate question`
			`question = 'intro text ' + approval_reason`

			`job_key.SetVariable( 'popup_yes_no_question', question )`

			`# pub it`

			`result = job_key.GetIfHasVariable( 'popup_yes_no_answer' )`

			`while result is None:`

			`if HG.view_shutdown:`

			`return`


			`time.sleep( 0.25 )`


			`if result:`

			`approved = VALID_APPROVED`

			`else:`

			`approved = VALID_DENIED`


			`self._domain_manager.SetHeaderValidation( network_context, key, approved )`


			`finally:`

			`self._is_done = True`
Version 273 2017-09-13 20:50:41 +00:00


			`# make this serialisable--maybe with name as the name of a named serialisable`
			`# __hash__ for name? not sure`
			`# maybe all serialisable should return __hash__ of ( type, name ) if they don't already`
			`# that might lead to problems elsewhere, so careful`
Version 275 2017-09-27 21:52:54 +00:00			`class URLMatch( HydrusSerialisable.SerialisableBaseNamed ):`
Version 273 2017-09-13 20:50:41 +00:00
Version 275 2017-09-27 21:52:54 +00:00			`SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_URL_MATCH`
			`SERIALISABLE_VERSION = 1`

Version 276 2017-10-04 17:51:58 +00:00			`def __init__( self, name, preferred_scheme = 'https', netloc = 'hostname.com', subdomain_is_important = False, path_components = None, parameters = None, example_url = 'https://hostname.com/post/page.php?id=123456&s=view' ):`
Version 275 2017-09-27 21:52:54 +00:00
			`if path_components is None:`

			`path_components = HydrusSerialisable.SerialisableList()`

			`path_components.append( ClientParsing.StringMatch( match_type = ClientParsing.STRING_MATCH_FIXED, match_value = 'post', example_string = 'post' ) )`
			`path_components.append( ClientParsing.StringMatch( match_type = ClientParsing.STRING_MATCH_FIXED, match_value = 'page.php', example_string = 'page.php' ) )`


			`if parameters is None:`

			`parameters = HydrusSerialisable.SerialisableDictionary()`

			`parameters[ 's' ] = ClientParsing.StringMatch( match_type = ClientParsing.STRING_MATCH_FIXED, match_value = 'view', example_string = 'view' )`
			`parameters[ 'id' ] = ClientParsing.StringMatch( match_type = ClientParsing.STRING_MATCH_FLEXIBLE, match_value = ClientParsing.NUMERIC, example_string = '123456' )`

Version 273 2017-09-13 20:50:41 +00:00
			`# an edit dialog panel for this that has example url and testing of current values`
			`# a parent panel or something that lists all current urls in the db that match and how they will be clipped, is this ok? kind of thing.`

Version 275 2017-09-27 21:52:54 +00:00			`HydrusSerialisable.SerialisableBaseNamed.__init__( self, name )`

Version 276 2017-10-04 17:51:58 +00:00			`self._preferred_scheme = preferred_scheme`
			`self._netloc = netloc`
			`self._subdomain_is_important = subdomain_is_important`
			`self._path_components = path_components`
			`self._parameters = parameters`
Version 273 2017-09-13 20:50:41 +00:00
Version 276 2017-10-04 17:51:58 +00:00			`self._example_url = example_url`
Version 273 2017-09-13 20:50:41 +00:00

			`def _ClipNetLoc( self, netloc ):`

			`if self._subdomain_is_important:`

			`# for domains like artistname.website.com, where removing the subdomain may break the url, we leave it alone`

			`pass`

			`else:`

			`# for domains like mediaserver4.website.com, where multiple subdomains serve the same content as the larger site`
			`# if the main site doesn't deliver the same content as the subdomain, then subdomain_is_important`

			`netloc = self._netloc`


			`return netloc`


Version 275 2017-09-27 21:52:54 +00:00			`def _GetSerialisableInfo( self ):`

			`serialisable_path_components = self._path_components.GetSerialisableTuple()`
			`serialisable_parameters = self._parameters.GetSerialisableTuple()`

			`return ( self._preferred_scheme, self._netloc, self._subdomain_is_important, serialisable_path_components, serialisable_parameters, self._example_url )`


			`def _InitialiseFromSerialisableInfo( self, serialisable_info ):`

			`( self._preferred_scheme, self._netloc, self._subdomain_is_important, serialisable_path_components, serialisable_parameters, self._example_url ) = serialisable_info`

			`self._path_components = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_path_components )`
			`self._parameters = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_parameters )`


Version 273 2017-09-13 20:50:41 +00:00			`def _ClipPath( self, path ):`

			`# /post/show/1326143/akunim-anthro-armband-armwear-clothed-clothing-fem`

			`while path.startswith( '/' ):`

			`path = path[ 1 : ]`


			`# post/show/1326143/akunim-anthro-armband-armwear-clothed-clothing-fem`

			`path_components = path.split( '/' )`

			`path = '/'.join( path_components[ : len( self._path_components ) ] )`

			`# post/show/1326143`

			`if len( path ) > 0:`

			`path = '/' + path`


			`# /post/show/1326143`

			`return path`


			`def _ClipQuery( self, query ):`

			`valid_parameters = []`

			`for ( key, value ) in urlparse.parse_qsl( query ):`

			`if key in self._parameters:`

			`valid_parameters.append( ( key, value ) )`



			`valid_parameters.sort()`

			`query = '&'.join( ( key + '=' + value for ( key, value ) in valid_parameters ) )`

			`return query`


Version 276 2017-10-04 17:51:58 +00:00			`def GetDomain( self ):`

			`return ConvertURLIntoDomain( self._example_url )`


Version 275 2017-09-27 21:52:54 +00:00			`def Normalise( self, url ):`
Version 273 2017-09-13 20:50:41 +00:00
			`p = urlparse.urlparse( url )`

			`scheme = self._preferred_scheme`
			`netloc = self._ClipNetLoc( p.netloc )`
			`path = self._ClipPath( p.path )`
			`params = ''`
			`query = self._ClipQuery( p.query )`
			`fragment = ''`

			`r = urlparse.ParseResult( scheme, netloc, path, params, query, fragment )`

			`return r.geturl()`


			`def Test( self, url ):`

			`# split the url into parts according to urlparse`
			`p = urlparse.urlparse( url )`

			`# test p.netloc with netloc, taking subdomain_is_important into account`

			`url_path = p.path`

			`while url_path.startswith( '/' ):`

			`url_path = url_path[ 1 : ]`


			`url_path_components = p.path.split( '/' )`

			`if len( url_path_components ) < len( self._path_components ):`

			`return ( False, p.path + ' did not have ' + str( len( self._path_components ) ) + ' components' )`


			`for ( url_path_component, expected_path_component ) in zip( url_path_components, self._path_components ):`

			`( bool_result, reason ) = expected_path_component.Test( url_path_component )`

			`if not bool_result:`

			`return ( bool_result, reason )`



			`url_parameters_list = urlparse.parse_qsl( p.query )`

			`if len( url_parameters_list ) < len( self._parameters ):`

			`return ( False, p.query + ' did not have ' + str( len( self._parameters ) ) + ' value pairs' )`


			`for ( key, url_value ) in url_parameters_list:`

			`if key not in self._parameters:`

			`return ( False, key + ' not found in ' + p.query )`


			`expected_value = self._parameters[ key ]`

			`( bool_result, reason ) = expected_value.Test( url_value )`

			`if not bool_result:`

			`return ( bool_result, reason )`



			`return ( True, 'good' )`


Version 275 2017-09-27 21:52:54 +00:00			`HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_URLS_IMPORT ] = URLMatch`