hydrus/hydrus/core/HydrusTags.py

import collections
import os
import re
import threading

from hydrus.core import HydrusConstants as HC
from hydrus.core import HydrusExceptions
from hydrus.core import HydrusSerialisable
from hydrus.core import HydrusText

def CensorshipMatch( tag, censorships ):
    
    for censorship in censorships:
        
        if censorship == '': # '' - all non namespaced tags
            
            ( namespace, subtag ) = SplitTag( tag )
            
            if namespace == '':
                
                return True
                
            
        elif censorship == ':': # ':' - all namespaced tags
            
            ( namespace, subtag ) = SplitTag( tag )
            
            if namespace != '':
                
                return True
                
            
        elif ':' in censorship:
            
            if censorship.endswith( ':' ): # 'series:' - namespaced tags
                
                ( namespace, subtag ) = SplitTag( tag )
                
                if namespace == censorship[:-1]:
                    
                    return True
                    
                
            else: # 'series:evangelion' - exact match with namespace
                
                if tag == censorship:
                    
                    return True
                    
                
            
        else:
            
            # 'table' - normal tag, or namespaced version of same
            
            ( namespace, subtag ) = SplitTag( tag )
            
            if subtag == censorship:
                
                return True
                
            
        
    
    return False
    
def CollapseMultipleSortedNumericTagsToMinMax( tags ):
    
    if len( tags ) <= 2:
        
        return tags
        
    else:
        
        includes_non_numeric_tag = True in ( not isinstance( ConvertTagToSortable( tag ), tuple ) for tag in tags )
        
        if includes_non_numeric_tag:
            
            return tags
            
        else:
            
            # this list of tags is entirely numeric and may well be something like 1, 2, 3, 4, 5
            # the caller wants to present 1-5 instead, so lets cut out the first and last
            
            if not isinstance( tags, list ):
                
                tags = list( tags )
                
            
            return [ tags[0], tags[-1] ]
            
        
    
def ConvertTagToSortable( tag ):
    
    # this copies the human sort in hydrusdata
    
    convert = lambda text: ( '', int( text ) ) if text.isdecimal() else ( text, 0 )
    
    return tuple( [ convert( c ) for c in re.split( '([0-9]+)', tag.lower() ) ] )
    
    # old method
    
    '''if len( t ) > 0 and t[0].isdecimal():
        
        # We want to maintain that:
        # 0 < 0a < 0b < 1 ( lexicographic comparison )
        # -and-
        # 2 < 22 ( value comparison )
        # So, if the first bit can be turned into an int, split it into ( int, extra )
        
        int_component = ''
        
        i = 0
        
        for character in t:
            
            if character.isdecimal():
                
                int_component += character
                
            else:
                
                break
                
            
            i += 1
            
        
        str_component = t[i:]
        
        number = int( int_component )
        
        return ( number, str_component )
        
    else:
        
        return t
        '''

def FilterNamespaces( tags, namespaces ):
    
    processed_tags = collections.defaultdict( set )
    
    for tag in tags:
        
        ( namespace, subtag ) = SplitTag( tag )
        
        processed_tags[ namespace ].add( tag )
        
    
    result = set()
    
    for namespace in namespaces:
        
        if namespace == None:
            
            result.update( processed_tags[ '' ] )
            
        else:
            
            result.update( processed_tags[ namespace ] )
            
        
    
    return result
    
def SortNumericTags( tags ):
    
    tags = list( tags )
    
    tags.sort( key = ConvertTagToSortable )
    
    return tags
    
def CheckTagNotEmpty( tag ):
    
    ( namespace, subtag ) = SplitTag( tag )
    
    if subtag == '':
        
        raise HydrusExceptions.TagSizeException( 'Received a zero-length tag!' )
        
    
def CleanTag( tag ):
    
    try:
        
        if tag is None:
            
            raise Exception()
            
        
        tag = tag[:1024]
        
        tag = tag.lower()
        
        tag = HydrusText.re_leading_single_colon.sub( '::', tag ) # Convert anything starting with one colon to start with two i.e. :D -> ::D
        
        if ':' in tag:
            
            tag = StripTextOfGumpf( tag ) # need to repeat here to catch 'system:' stuff
            
            ( namespace, subtag ) = SplitTag( tag )
            
            namespace = StripTextOfGumpf( namespace )
            subtag = StripTextOfGumpf( subtag )
            
            tag = CombineTag( namespace, subtag )
            
        else:
            
            tag = StripTextOfGumpf( tag )
            
        
    except Exception as e:
        
        text = 'Was unable to parse the tag: ' + str( tag )
        text += os.linesep * 2
        text += str( e )
        
        raise Exception( text )
        
    
    return tag

def CleanTags( tags ):
    
    clean_tags = set()
    
    for tag in tags:
        
        if tag is None:
            
            continue
            
        
        tag = CleanTag( tag )
        
        try:
            
            CheckTagNotEmpty( tag )
            
        except HydrusExceptions.TagSizeException:
            
            continue
            
        
        clean_tags.add( tag )
        
    
    return clean_tags
    
def CombineTag( namespace, subtag ):
    
    if namespace == '':
        
        if HydrusText.re_leading_single_colon.search( subtag ) is not None:
            
            return ':' + subtag
            
        else:
            
            return subtag
            
        
    else:
        
        return namespace + ':' + subtag
        
    
def ConvertTagSliceToString( tag_slice ):
    
    if tag_slice == '':
        
        return 'unnamespaced tags'
        
    elif tag_slice == ':':
        
        return 'namespaced tags'
        
    elif tag_slice.count( ':' ) == 1 and tag_slice.endswith( ':' ):
        
        namespace = tag_slice[ : -1 ]
        
        return '\'' + namespace + '\' tags'
        
    else:
        
        return tag_slice
        
    
def IsUnnamespaced( tag ):
    
    return SplitTag( tag )[0] == ''
    
def SplitTag( tag ):
    
    if ':' in tag:
        
        return tuple( tag.split( ':', 1 ) )
        
    else:
        
        return ( '', tag )
        
    
NULL_CHARACTER = '\x00'

def StripTextOfGumpf( t ):
    
    t = HydrusText.re_newlines.sub( '', t )
    
    t = HydrusText.re_multiple_spaces.sub( ' ', t )
    
    t = t.strip()
    
    t = HydrusText.re_leading_space_or_garbage.sub( '', t )
    
    if NULL_CHARACTER in t:
        
        t = t.replace( NULL_CHARACTER, '' )
        
    
    return t
    
def TagOK( t ):
    
    try:
        
        CheckTagNotEmpty( CleanTag( t ) )
        
        return True
        
    except:
        
        return False
        
    
class TagFilter( HydrusSerialisable.SerialisableBase ):
    
    SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_TAG_FILTER
    SERIALISABLE_NAME = 'Tag Filter Rules'
    SERIALISABLE_VERSION = 1
    
    def __init__( self ):
        
        HydrusSerialisable.SerialisableBase.__init__( self )
        
        self._lock = threading.Lock()
        
        self._tag_slices_to_rules = {}
        
        self._all_unnamespaced_whitelisted = False
        self._all_namespaced_whitelisted = False
        self._namespaces_whitelist = set()
        self._tags_whitelist = set()
        
        self._all_unnamespaced_blacklisted = False
        self._all_namespaced_blacklisted = False
        self._namespaces_blacklist = set()
        self._tags_blacklist = set()
        
        self._namespaced_interesting = False
        self._tags_interesting = False
        
    
    def __eq__( self, other ):
        
        if isinstance( other, TagFilter ):
            
            return self._tag_slices_to_rules == other._tag_slices_to_rules
            
        
        return NotImplemented
        
    
    def _IterateTagSlices( self, tag, apply_unnamespaced_rules_to_namespaced_tags ):
        
        # this guy gets called a lot, so we are making it an iterator
        
        yield tag
        
        ( namespace, subtag ) = SplitTag( tag )
        
        if tag != subtag and apply_unnamespaced_rules_to_namespaced_tags:
            
            yield subtag
            
        
        if namespace != '':
            
            yield '{}:'.format( namespace )
            yield ':'
            
        else:
            
            yield ''
            
        
    
    def _GetSerialisableInfo( self ):
        
        return list( self._tag_slices_to_rules.items() )
        
    
    def _InitialiseFromSerialisableInfo( self, serialisable_info ):
        
        self._tag_slices_to_rules = dict( serialisable_info )
        
        self._UpdateRuleCache()
        
    
    def _TagOK( self, tag, apply_unnamespaced_rules_to_namespaced_tags = False ):
        
        # old method, has a bunch of overhead due to iteration
        '''
        blacklist_encountered = False
        
        for tag_slice in self._IterateTagSlices( tag, apply_unnamespaced_rules_to_namespaced_tags = apply_unnamespaced_rules_to_namespaced_tags ):
            
            if tag_slice in self._tag_slices_to_rules:
                
                rule = self._tag_slices_to_rules[ tag_slice ]
                
                if rule == HC.FILTER_WHITELIST:
                    
                    return True # there is an exception for this class of tag
                    
                elif rule == HC.FILTER_BLACKLIST: # there is a rule against this class of tag
                    
                    blacklist_encountered = True
                    
                
            
        
        if blacklist_encountered: # rule against and no exceptions
            
            return False
            
        else:
            
            return True # no rules against or explicitly for, so permitted
            
        '''
        
        #
        
        # since this is called a whole bunch and overhead piles up, we are now splaying the logic out to hardcoded tests
        
        blacklist_encountered = False
        
        if self._tags_interesting:
            
            if tag in self._tags_whitelist:
                
                return True
                
            
            if tag in self._tags_blacklist:
                
                blacklist_encountered = True
                
            
        
        if self._namespaced_interesting or apply_unnamespaced_rules_to_namespaced_tags:
            
            ( namespace, subtag ) = SplitTag( tag )
            
            if apply_unnamespaced_rules_to_namespaced_tags and self._tags_interesting and subtag != tag:
                
                if subtag in self._tags_whitelist:
                    
                    return True
                    
                
                if subtag in self._tags_blacklist:
                    
                    blacklist_encountered = True
                    
                
            
            if self._namespaced_interesting:
                
                if namespace == '':
                    
                    if self._all_unnamespaced_whitelisted:
                        
                        return True
                        
                    
                    if self._all_unnamespaced_blacklisted:
                        
                        blacklist_encountered = True
                        
                    
                else:
                    
                    if self._all_namespaced_whitelisted or namespace in self._namespaces_whitelist:
                        
                        return True
                        
                    
                    if self._all_namespaced_blacklisted or namespace in self._namespaces_blacklist:
                        
                        blacklist_encountered = True
                        
                    
                
            
        
        if blacklist_encountered: # rule against and no exceptions
            
            return False
            
        else:
            
            return True # no rules against or explicitly for, so permitted
            
        
    
    def _UpdateRuleCache( self ):
        
        self._all_unnamespaced_whitelisted = False
        self._all_namespaced_whitelisted = False
        self._namespaces_whitelist = set()
        self._tags_whitelist = set()
        
        self._all_unnamespaced_blacklisted = False
        self._all_namespaced_blacklisted = False
        self._namespaces_blacklist = set()
        self._tags_blacklist = set()
        
        self._namespaced_interesting = False
        self._tags_interesting = False
        
        for ( tag_slice, rule ) in self._tag_slices_to_rules.items():
            
            if tag_slice == '':
                
                if rule == HC.FILTER_WHITELIST:
                    
                    self._all_unnamespaced_whitelisted = True
                    
                else:
                    
                    self._all_unnamespaced_blacklisted = True
                    
                
                self._namespaced_interesting = True
                
            elif tag_slice == ':':
                
                if rule == HC.FILTER_WHITELIST:
                    
                    self._all_namespaced_whitelisted = True
                    
                else:
                    
                    self._all_namespaced_blacklisted = True
                    
                
                self._namespaced_interesting = True
                
            elif tag_slice.count( ':' ) == 1 and tag_slice.endswith( ':' ):
                
                if rule == HC.FILTER_WHITELIST:
                    
                    self._namespaces_whitelist.add( tag_slice[:-1] )
                    
                else:
                    
                    self._namespaces_blacklist.add( tag_slice[:-1] )
                    
                
                self._namespaced_interesting = True
                
            else:
                
                if rule == HC.FILTER_WHITELIST:
                    
                    self._tags_whitelist.add( tag_slice )
                    
                else:
                    
                    self._tags_blacklist.add( tag_slice )
                    
                
                self._tags_interesting = True
                
            
        
    
    def AllowsEverything( self ):
        
        with self._lock:
            
            for ( tag_slice, rule ) in self._tag_slices_to_rules.items():
                
                if rule == HC.FILTER_BLACKLIST:
                    
                    return False
                    
                
            
            return True
            
        
    
    def CleanRules( self ):
        
        new_tag_slices_to_rules = {}
        
        for ( tag_slice, rule ) in self._tag_slices_to_rules.items():
            
            if tag_slice == '':
                
                pass
                
            elif tag_slice == ':':
                
                pass
                
            elif tag_slice.count( ':' ) == 1 and tag_slice.endswith( ':' ):
                
                example_tag = tag_slice + 'example'
                
                try:
                    
                    clean_example_tag = CleanTag( example_tag )
                    
                except:
                    
                    continue
                    
                
                tag_slice = clean_example_tag[:-7]
                
            else:
                
                tag = tag_slice
                
                try:
                    
                    clean_tag = CleanTag( tag )
                    
                except:
                    
                    continue
                    
                
                tag_slice = clean_tag
                
            
            new_tag_slices_to_rules[ tag_slice ] = rule
            
        
        self._tag_slices_to_rules = new_tag_slices_to_rules
        
        self._UpdateRuleCache()
        
    
    def Filter( self, tags, apply_unnamespaced_rules_to_namespaced_tags = False ):
        
        with self._lock:
            
            return { tag for tag in tags if self._TagOK( tag, apply_unnamespaced_rules_to_namespaced_tags = apply_unnamespaced_rules_to_namespaced_tags ) }
            
        
    
    def GetTagSlicesToRules( self ):
        
        with self._lock:
            
            return dict( self._tag_slices_to_rules )
            
        
    
    def SetRule( self, tag_slice, rule ):
        
        with self._lock:
            
            self._tag_slices_to_rules[ tag_slice ] = rule
            
            self._UpdateRuleCache()
            
        
    
    def TagOK( self, tag, apply_unnamespaced_rules_to_namespaced_tags = False ):
        
        with self._lock:
            
            return self._TagOK( tag, apply_unnamespaced_rules_to_namespaced_tags = apply_unnamespaced_rules_to_namespaced_tags )
            
        
    
    def ToBlacklistString( self ):
        
        with self._lock:
            
            blacklist = []
            whitelist = []
            
            for ( tag_slice, rule ) in self._tag_slices_to_rules.items():
                
                if rule == HC.FILTER_BLACKLIST:
                    
                    blacklist.append( tag_slice )
                    
                elif rule == HC.FILTER_WHITELIST:
                    
                    whitelist.append( tag_slice )
                    
                
            
            blacklist.sort()
            whitelist.sort()
            
            if len( blacklist ) == 0:
                
                return 'no blacklist set'
                
            else:
                
                if set( blacklist ) == { '', ':' }:
                    
                    text = 'blacklisting on any tags'
                    
                else:
                    
                    text = 'blacklisting on ' + ', '.join( ( ConvertTagSliceToString( tag_slice ) for tag_slice in blacklist ) )
                    
                
                if len( whitelist ) > 0:
                    
                    text += ' except ' + ', '.join( ( ConvertTagSliceToString( tag_slice ) for tag_slice in whitelist ) )
                    
                
                return text
                
            
        
    
    def ToCensoredString( self ):
        
        with self._lock:
            
            blacklist = []
            whitelist = []
            
            for ( tag_slice, rule ) in list(self._tag_slices_to_rules.items()):
                
                if rule == HC.FILTER_BLACKLIST:
                    
                    blacklist.append( tag_slice )
                    
                elif rule == HC.FILTER_WHITELIST:
                    
                    whitelist.append( tag_slice )
                    
                
            
            blacklist.sort()
            whitelist.sort()
            
            if len( blacklist ) == 0:
                
                return 'all tags allowed'
                
            else:
                
                if set( blacklist ) == { '', ':' }:
                    
                    text = 'no tags allowed'
                    
                else:
                    
                    text = 'all but ' + ', '.join( ( ConvertTagSliceToString( tag_slice ) for tag_slice in blacklist ) ) + ' allowed'
                    
                
                if len( whitelist ) > 0:
                    
                    text += ' except ' + ', '.join( ( ConvertTagSliceToString( tag_slice ) for tag_slice in whitelist ) )
                    
                
                return text
                
            
        
    
    def ToPermittedString( self ):
        
        with self._lock:
            
            blacklist = []
            whitelist = []
            
            for ( tag_slice, rule ) in list(self._tag_slices_to_rules.items()):
                
                if rule == HC.FILTER_BLACKLIST:
                    
                    blacklist.append( tag_slice )
                    
                elif rule == HC.FILTER_WHITELIST:
                    
                    whitelist.append( tag_slice )
                    
                
            
            blacklist.sort()
            whitelist.sort()
            
            if len( blacklist ) == 0:
                
                return 'all tags'
                
            else:
                
                if set( blacklist ) == { '', ':' }:
                    
                    if len( whitelist ) == 0:
                        
                        text = 'no tags'
                        
                    else:
                        
                        text = 'only ' + ', '.join( ( ConvertTagSliceToString( tag_slice ) for tag_slice in whitelist ) )
                        
                    
                elif set( blacklist ) == { '' }:
                    
                    text = 'all namespaced tags'
                    
                    if len( whitelist ) > 0:
                        
                        text += ' and ' + ', '.join( ( ConvertTagSliceToString( tag_slice ) for tag_slice in whitelist ) )
                        
                    
                elif set( blacklist ) == { ':' }:
                    
                    text = 'all unnamespaced tags'
                    
                    if len( whitelist ) > 0:
                        
                        text += ' and ' + ', '.join( ( ConvertTagSliceToString( tag_slice ) for tag_slice in whitelist ) )
                        
                    
                else:
                    
                    text = 'all tags except ' + ', '.join( ( ConvertTagSliceToString( tag_slice ) for tag_slice in blacklist ) )
                    
                    if len( whitelist ) > 0:
                        
                        text += ' (except ' + ', '.join( ( ConvertTagSliceToString( tag_slice ) for tag_slice in whitelist ) ) + ')'
                        
                    
                
            
            return text
            
        
    
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_TAG_FILTER ] = TagFilter
Version 77 2013-07-17 20:56:13 +00:00			`import collections`
			`import os`
Version 151 2015-03-25 22:04:19 +00:00			`import re`
Version 434 2021-04-07 21:26:45 +00:00			`import threading`
Version 406 2020-07-29 20:52:44 +00:00
			`from hydrus.core import HydrusConstants as HC`
			`from hydrus.core import HydrusExceptions`
Version 434 2021-04-07 21:26:45 +00:00			`from hydrus.core import HydrusSerialisable`
Version 394 2020-04-22 21:00:35 +00:00			`from hydrus.core import HydrusText`
Version 250 2017-04-05 21:16:40 +00:00
Version 200 2016-04-06 19:52:45 +00:00			`def CensorshipMatch( tag, censorships ):`
Version 107 2014-03-12 22:08:23 +00:00
Version 200 2016-04-06 19:52:45 +00:00			`for censorship in censorships:`
Version 107 2014-03-12 22:08:23 +00:00
Version 200 2016-04-06 19:52:45 +00:00			`if censorship == '': # '' - all non namespaced tags`

Version 244 2017-02-08 22:27:00 +00:00			`( namespace, subtag ) = SplitTag( tag )`

			`if namespace == '':`
Version 200 2016-04-06 19:52:45 +00:00
			`return True`


			`elif censorship == ':': # ':' - all namespaced tags`

Version 244 2017-02-08 22:27:00 +00:00			`( namespace, subtag ) = SplitTag( tag )`

			`if namespace != '':`
Version 200 2016-04-06 19:52:45 +00:00
			`return True`

Version 107 2014-03-12 22:08:23 +00:00
Version 200 2016-04-06 19:52:45 +00:00			`elif ':' in censorship:`
Version 107 2014-03-12 22:08:23 +00:00
Version 200 2016-04-06 19:52:45 +00:00			`if censorship.endswith( ':' ): # 'series:' - namespaced tags`

Version 244 2017-02-08 22:27:00 +00:00			`( namespace, subtag ) = SplitTag( tag )`

Version 247 2017-03-15 20:13:04 +00:00			`if namespace == censorship[:-1]:`
Version 200 2016-04-06 19:52:45 +00:00
			`return True`


			`else: # 'series:evangelion' - exact match with namespace`

			`if tag == censorship:`

			`return True`



			`else:`

			`# 'table' - normal tag, or namespaced version of same`

Version 244 2017-02-08 22:27:00 +00:00			`( namespace, subtag ) = SplitTag( tag )`

			`if subtag == censorship:`
Version 200 2016-04-06 19:52:45 +00:00
Version 244 2017-02-08 22:27:00 +00:00			`return True`
Version 200 2016-04-06 19:52:45 +00:00
Version 107 2014-03-12 22:08:23 +00:00


Version 200 2016-04-06 19:52:45 +00:00			`return False`

Version 296 2018-02-28 22:30:36 +00:00			`def CollapseMultipleSortedNumericTagsToMinMax( tags ):`

			`if len( tags ) <= 2:`

			`return tags`

			`else:`

			`includes_non_numeric_tag = True in ( not isinstance( ConvertTagToSortable( tag ), tuple ) for tag in tags )`

			`if includes_non_numeric_tag:`

			`return tags`

			`else:`

			`# this list of tags is entirely numeric and may well be something like 1, 2, 3, 4, 5`
			`# the caller wants to present 1-5 instead, so lets cut out the first and last`

			`if not isinstance( tags, list ):`

			`tags = list( tags )`


			`return [ tags[0], tags[-1] ]`



Version 322 2018-09-12 21:36:26 +00:00			`def ConvertTagToSortable( tag ):`
Version 236 2016-12-14 21:19:07 +00:00
Version 324 2018-09-26 19:05:12 +00:00			`# this copies the human sort in hydrusdata`
Version 322 2018-09-12 21:36:26 +00:00
Version 336 2019-01-16 22:40:53 +00:00			`convert = lambda text: ( '', int( text ) ) if text.isdecimal() else ( text, 0 )`
Version 322 2018-09-12 21:36:26 +00:00
Version 335 2019-01-09 22:59:03 +00:00			`return tuple( [ convert( c ) for c in re.split( '([0-9]+)', tag.lower() ) ] )`
Version 322 2018-09-12 21:36:26 +00:00
			`# old method`

			`'''if len( t ) > 0 and t[0].isdecimal():`
Version 146 2015-02-03 20:40:21 +00:00
			`# We want to maintain that:`
			`# 0 < 0a < 0b < 1 ( lexicographic comparison )`
			`# -and-`
			`# 2 < 22 ( value comparison )`
			`# So, if the first bit can be turned into an int, split it into ( int, extra )`

			`int_component = ''`

			`i = 0`

			`for character in t:`

Version 284 2017-11-29 21:48:23 +00:00			`if character.isdecimal():`

			`int_component += character`

			`else:`

			`break`

Version 146 2015-02-03 20:40:21 +00:00
			`i += 1`


			`str_component = t[i:]`

Version 236 2016-12-14 21:19:07 +00:00			`number = int( int_component )`

			`return ( number, str_component )`

			`else:`

			`return t`
Version 322 2018-09-12 21:36:26 +00:00			`'''`
Version 146 2015-02-03 20:40:21 +00:00
Version 136 2014-11-12 23:33:13 +00:00			`def FilterNamespaces( tags, namespaces ):`

			`processed_tags = collections.defaultdict( set )`

			`for tag in tags:`

Version 244 2017-02-08 22:27:00 +00:00			`( namespace, subtag ) = SplitTag( tag )`

			`processed_tags[ namespace ].add( tag )`
Version 136 2014-11-12 23:33:13 +00:00

			`result = set()`

			`for namespace in namespaces:`

Version 244 2017-02-08 22:27:00 +00:00			`if namespace == None:`

			`result.update( processed_tags[ '' ] )`

			`else:`

			`result.update( processed_tags[ namespace ] )`

Version 136 2014-11-12 23:33:13 +00:00

			`return result`

Version 203 2016-04-27 19:20:37 +00:00			`def SortNumericTags( tags ):`
Version 145 2015-01-21 22:49:58 +00:00
			`tags = list( tags )`

Version 146 2015-02-03 20:40:21 +00:00			`tags.sort( key = ConvertTagToSortable )`
Version 145 2015-01-21 22:49:58 +00:00
			`return tags`

Version 151 2015-03-25 22:04:19 +00:00			`def CheckTagNotEmpty( tag ):`

Version 244 2017-02-08 22:27:00 +00:00			`( namespace, subtag ) = SplitTag( tag )`
Version 151 2015-03-25 22:04:19 +00:00
Version 244 2017-02-08 22:27:00 +00:00			`if subtag == '':`
Version 151 2015-03-25 22:04:19 +00:00
Version 399 2020-05-27 21:27:52 +00:00			`raise HydrusExceptions.TagSizeException( 'Received a zero-length tag!' )`
Version 151 2015-03-25 22:04:19 +00:00
Version 250 2017-04-05 21:16:40 +00:00
Version 151 2015-03-25 22:04:19 +00:00			`def CleanTag( tag ):`

Version 165 2015-07-15 20:28:26 +00:00			`try:`

Version 306 2018-05-09 20:23:00 +00:00			`if tag is None:`

			`raise Exception()`


Version 165 2015-07-15 20:28:26 +00:00			`tag = tag[:1024]`

			`tag = tag.lower()`

Version 307 2018-05-16 20:09:50 +00:00			`tag = HydrusText.re_leading_single_colon.sub( '::', tag ) # Convert anything starting with one colon to start with two i.e. :D -> ::D`

			`if ':' in tag:`
Version 245 2017-03-02 02:14:56 +00:00
Version 250 2017-04-05 21:16:40 +00:00			`tag = StripTextOfGumpf( tag ) # need to repeat here to catch 'system:' stuff`

Version 245 2017-03-02 02:14:56 +00:00			`( namespace, subtag ) = SplitTag( tag )`

Version 250 2017-04-05 21:16:40 +00:00			`namespace = StripTextOfGumpf( namespace )`
			`subtag = StripTextOfGumpf( subtag )`
Version 245 2017-03-02 02:14:56 +00:00
			`tag = CombineTag( namespace, subtag )`

			`else:`

Version 250 2017-04-05 21:16:40 +00:00			`tag = StripTextOfGumpf( tag )`
Version 165 2015-07-15 20:28:26 +00:00
Version 185 2015-12-09 23:16:41 +00:00
Version 165 2015-07-15 20:28:26 +00:00			`except Exception as e:`

Version 335 2019-01-09 22:59:03 +00:00			`text = 'Was unable to parse the tag: ' + str( tag )`
Version 165 2015-07-15 20:28:26 +00:00			`text += os.linesep * 2`
Version 335 2019-01-09 22:59:03 +00:00			`text += str( e )`
Version 151 2015-03-25 22:04:19 +00:00
Version 165 2015-07-15 20:28:26 +00:00			`raise Exception( text )`
Version 151 2015-03-25 22:04:19 +00:00

			`return tag`

			`def CleanTags( tags ):`

			`clean_tags = set()`

			`for tag in tags:`

Version 306 2018-05-09 20:23:00 +00:00			`if tag is None:`

			`continue`


Version 151 2015-03-25 22:04:19 +00:00			`tag = CleanTag( tag )`

Version 258 2017-05-31 21:50:53 +00:00			`try:`

			`CheckTagNotEmpty( tag )`

Version 399 2020-05-27 21:27:52 +00:00			`except HydrusExceptions.TagSizeException:`
Version 258 2017-05-31 21:50:53 +00:00
			`continue`

Version 151 2015-03-25 22:04:19 +00:00
			`clean_tags.add( tag )`


			`return clean_tags`

Version 244 2017-02-08 22:27:00 +00:00			`def CombineTag( namespace, subtag ):`
Version 185 2015-12-09 23:16:41 +00:00
			`if namespace == '':`

Version 307 2018-05-16 20:09:50 +00:00			`if HydrusText.re_leading_single_colon.search( subtag ) is not None:`
Version 185 2015-12-09 23:16:41 +00:00
Version 244 2017-02-08 22:27:00 +00:00			`return ':' + subtag`
Version 185 2015-12-09 23:16:41 +00:00
			`else:`

Version 244 2017-02-08 22:27:00 +00:00			`return subtag`
Version 185 2015-12-09 23:16:41 +00:00

			`else:`

Version 244 2017-02-08 22:27:00 +00:00			`return namespace + ':' + subtag`
Version 185 2015-12-09 23:16:41 +00:00

Version 434 2021-04-07 21:26:45 +00:00			`def ConvertTagSliceToString( tag_slice ):`

			`if tag_slice == '':`

			`return 'unnamespaced tags'`

			`elif tag_slice == ':':`

			`return 'namespaced tags'`

			`elif tag_slice.count( ':' ) == 1 and tag_slice.endswith( ':' ):`

			`namespace = tag_slice[ : -1 ]`

			`return '\'' + namespace + '\' tags'`

			`else:`

			`return tag_slice`


Version 463 2021-11-24 21:59:58 +00:00			`def IsUnnamespaced( tag ):`

			`return SplitTag( tag )[0] == ''`

Version 244 2017-02-08 22:27:00 +00:00			`def SplitTag( tag ):`

			`if ':' in tag:`

Version 364 2019-08-15 00:40:48 +00:00			`return tuple( tag.split( ':', 1 ) )`
Version 244 2017-02-08 22:27:00 +00:00
			`else:`

Version 335 2019-01-09 22:59:03 +00:00			`return ( '', tag )`
Version 244 2017-02-08 22:27:00 +00:00

Version 420 closes #725 2020-12-02 22:04:38 +00:00			`NULL_CHARACTER = '\x00'`

Version 250 2017-04-05 21:16:40 +00:00			`def StripTextOfGumpf( t ):`

Version 286 2017-12-13 22:33:07 +00:00			`t = HydrusText.re_newlines.sub( '', t )`
Version 250 2017-04-05 21:16:40 +00:00
Version 286 2017-12-13 22:33:07 +00:00			`t = HydrusText.re_multiple_spaces.sub( ' ', t )`
Version 250 2017-04-05 21:16:40 +00:00
Version 341 2019-02-27 23:03:30 +00:00			`t = t.strip()`
Version 250 2017-04-05 21:16:40 +00:00
Version 286 2017-12-13 22:33:07 +00:00			`t = HydrusText.re_leading_space_or_garbage.sub( '', t )`
Version 250 2017-04-05 21:16:40 +00:00
Version 420 closes #725 2020-12-02 22:04:38 +00:00			`if NULL_CHARACTER in t:`

			`t = t.replace( NULL_CHARACTER, '' )`


Version 250 2017-04-05 21:16:40 +00:00			`return t`

Version 387 2020-03-04 22:12:53 +00:00			`def TagOK( t ):`

			`try:`

			`CheckTagNotEmpty( CleanTag( t ) )`

			`return True`

			`except:`

			`return False`


Version 434 2021-04-07 21:26:45 +00:00			`class TagFilter( HydrusSerialisable.SerialisableBase ):`

			`SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_TAG_FILTER`
			`SERIALISABLE_NAME = 'Tag Filter Rules'`
			`SERIALISABLE_VERSION = 1`

			`def __init__( self ):`

			`HydrusSerialisable.SerialisableBase.__init__( self )`

			`self._lock = threading.Lock()`

			`self._tag_slices_to_rules = {}`

			`self._all_unnamespaced_whitelisted = False`
			`self._all_namespaced_whitelisted = False`
			`self._namespaces_whitelist = set()`
			`self._tags_whitelist = set()`

			`self._all_unnamespaced_blacklisted = False`
			`self._all_namespaced_blacklisted = False`
			`self._namespaces_blacklist = set()`
			`self._tags_blacklist = set()`

			`self._namespaced_interesting = False`
			`self._tags_interesting = False`


			`def __eq__( self, other ):`

			`if isinstance( other, TagFilter ):`

			`return self._tag_slices_to_rules == other._tag_slices_to_rules`


			`return NotImplemented`


			`def _IterateTagSlices( self, tag, apply_unnamespaced_rules_to_namespaced_tags ):`

			`# this guy gets called a lot, so we are making it an iterator`

			`yield tag`

			`( namespace, subtag ) = SplitTag( tag )`

			`if tag != subtag and apply_unnamespaced_rules_to_namespaced_tags:`

			`yield subtag`


			`if namespace != '':`

			`yield '{}:'.format( namespace )`
			`yield ':'`

			`else:`

			`yield ''`



			`def _GetSerialisableInfo( self ):`

			`return list( self._tag_slices_to_rules.items() )`


			`def _InitialiseFromSerialisableInfo( self, serialisable_info ):`

			`self._tag_slices_to_rules = dict( serialisable_info )`

			`self._UpdateRuleCache()`


			`def _TagOK( self, tag, apply_unnamespaced_rules_to_namespaced_tags = False ):`

			`# old method, has a bunch of overhead due to iteration`
			`'''`
			`blacklist_encountered = False`

			`for tag_slice in self._IterateTagSlices( tag, apply_unnamespaced_rules_to_namespaced_tags = apply_unnamespaced_rules_to_namespaced_tags ):`

			`if tag_slice in self._tag_slices_to_rules:`

			`rule = self._tag_slices_to_rules[ tag_slice ]`

			`if rule == HC.FILTER_WHITELIST:`

			`return True # there is an exception for this class of tag`

			`elif rule == HC.FILTER_BLACKLIST: # there is a rule against this class of tag`

			`blacklist_encountered = True`




			`if blacklist_encountered: # rule against and no exceptions`

			`return False`

			`else:`

			`return True # no rules against or explicitly for, so permitted`

			`'''`

			`#`

			`# since this is called a whole bunch and overhead piles up, we are now splaying the logic out to hardcoded tests`

			`blacklist_encountered = False`

			`if self._tags_interesting:`

			`if tag in self._tags_whitelist:`

			`return True`


			`if tag in self._tags_blacklist:`

			`blacklist_encountered = True`



			`if self._namespaced_interesting or apply_unnamespaced_rules_to_namespaced_tags:`

			`( namespace, subtag ) = SplitTag( tag )`

			`if apply_unnamespaced_rules_to_namespaced_tags and self._tags_interesting and subtag != tag:`

			`if subtag in self._tags_whitelist:`

			`return True`


			`if subtag in self._tags_blacklist:`

			`blacklist_encountered = True`



			`if self._namespaced_interesting:`

			`if namespace == '':`

			`if self._all_unnamespaced_whitelisted:`

			`return True`


			`if self._all_unnamespaced_blacklisted:`

			`blacklist_encountered = True`


			`else:`

			`if self._all_namespaced_whitelisted or namespace in self._namespaces_whitelist:`

			`return True`


			`if self._all_namespaced_blacklisted or namespace in self._namespaces_blacklist:`

			`blacklist_encountered = True`





			`if blacklist_encountered: # rule against and no exceptions`

			`return False`

			`else:`

			`return True # no rules against or explicitly for, so permitted`



			`def _UpdateRuleCache( self ):`

			`self._all_unnamespaced_whitelisted = False`
			`self._all_namespaced_whitelisted = False`
			`self._namespaces_whitelist = set()`
			`self._tags_whitelist = set()`

			`self._all_unnamespaced_blacklisted = False`
			`self._all_namespaced_blacklisted = False`
			`self._namespaces_blacklist = set()`
			`self._tags_blacklist = set()`

			`self._namespaced_interesting = False`
			`self._tags_interesting = False`

			`for ( tag_slice, rule ) in self._tag_slices_to_rules.items():`

			`if tag_slice == '':`

			`if rule == HC.FILTER_WHITELIST:`

			`self._all_unnamespaced_whitelisted = True`

			`else:`

			`self._all_unnamespaced_blacklisted = True`


			`self._namespaced_interesting = True`

			`elif tag_slice == ':':`

			`if rule == HC.FILTER_WHITELIST:`

			`self._all_namespaced_whitelisted = True`

			`else:`

			`self._all_namespaced_blacklisted = True`


			`self._namespaced_interesting = True`

			`elif tag_slice.count( ':' ) == 1 and tag_slice.endswith( ':' ):`

			`if rule == HC.FILTER_WHITELIST:`

			`self._namespaces_whitelist.add( tag_slice[:-1] )`

			`else:`

			`self._namespaces_blacklist.add( tag_slice[:-1] )`


			`self._namespaced_interesting = True`

			`else:`

			`if rule == HC.FILTER_WHITELIST:`

			`self._tags_whitelist.add( tag_slice )`

			`else:`

			`self._tags_blacklist.add( tag_slice )`


			`self._tags_interesting = True`




			`def AllowsEverything( self ):`

			`with self._lock:`

			`for ( tag_slice, rule ) in self._tag_slices_to_rules.items():`

			`if rule == HC.FILTER_BLACKLIST:`

			`return False`



			`return True`



Version 448 closes #768 2021-07-28 21:12:00 +00:00			`def CleanRules( self ):`

			`new_tag_slices_to_rules = {}`

			`for ( tag_slice, rule ) in self._tag_slices_to_rules.items():`

			`if tag_slice == '':`

			`pass`

			`elif tag_slice == ':':`

			`pass`

			`elif tag_slice.count( ':' ) == 1 and tag_slice.endswith( ':' ):`

			`example_tag = tag_slice + 'example'`

			`try:`

			`clean_example_tag = CleanTag( example_tag )`

			`except:`

			`continue`


			`tag_slice = clean_example_tag[:-7]`

			`else:`

			`tag = tag_slice`

			`try:`

			`clean_tag = CleanTag( tag )`

			`except:`

			`continue`


			`tag_slice = clean_tag`


			`new_tag_slices_to_rules[ tag_slice ] = rule`


			`self._tag_slices_to_rules = new_tag_slices_to_rules`

			`self._UpdateRuleCache()`


Version 434 2021-04-07 21:26:45 +00:00			`def Filter( self, tags, apply_unnamespaced_rules_to_namespaced_tags = False ):`

			`with self._lock:`

			`return { tag for tag in tags if self._TagOK( tag, apply_unnamespaced_rules_to_namespaced_tags = apply_unnamespaced_rules_to_namespaced_tags ) }`



			`def GetTagSlicesToRules( self ):`

			`with self._lock:`

			`return dict( self._tag_slices_to_rules )`



			`def SetRule( self, tag_slice, rule ):`

			`with self._lock:`

			`self._tag_slices_to_rules[ tag_slice ] = rule`

			`self._UpdateRuleCache()`



			`def TagOK( self, tag, apply_unnamespaced_rules_to_namespaced_tags = False ):`

			`with self._lock:`

			`return self._TagOK( tag, apply_unnamespaced_rules_to_namespaced_tags = apply_unnamespaced_rules_to_namespaced_tags )`



			`def ToBlacklistString( self ):`

			`with self._lock:`

			`blacklist = []`
			`whitelist = []`

			`for ( tag_slice, rule ) in self._tag_slices_to_rules.items():`

			`if rule == HC.FILTER_BLACKLIST:`

			`blacklist.append( tag_slice )`

			`elif rule == HC.FILTER_WHITELIST:`

			`whitelist.append( tag_slice )`



			`blacklist.sort()`
			`whitelist.sort()`

			`if len( blacklist ) == 0:`

			`return 'no blacklist set'`

			`else:`

			`if set( blacklist ) == { '', ':' }:`

			`text = 'blacklisting on any tags'`

			`else:`

			`text = 'blacklisting on ' + ', '.join( ( ConvertTagSliceToString( tag_slice ) for tag_slice in blacklist ) )`


			`if len( whitelist ) > 0:`

			`text += ' except ' + ', '.join( ( ConvertTagSliceToString( tag_slice ) for tag_slice in whitelist ) )`


			`return text`




			`def ToCensoredString( self ):`

			`with self._lock:`

			`blacklist = []`
			`whitelist = []`

			`for ( tag_slice, rule ) in list(self._tag_slices_to_rules.items()):`

			`if rule == HC.FILTER_BLACKLIST:`

			`blacklist.append( tag_slice )`

			`elif rule == HC.FILTER_WHITELIST:`

			`whitelist.append( tag_slice )`



			`blacklist.sort()`
			`whitelist.sort()`

			`if len( blacklist ) == 0:`

			`return 'all tags allowed'`

			`else:`

			`if set( blacklist ) == { '', ':' }:`

			`text = 'no tags allowed'`

			`else:`

			`text = 'all but ' + ', '.join( ( ConvertTagSliceToString( tag_slice ) for tag_slice in blacklist ) ) + ' allowed'`


			`if len( whitelist ) > 0:`

			`text += ' except ' + ', '.join( ( ConvertTagSliceToString( tag_slice ) for tag_slice in whitelist ) )`


			`return text`




			`def ToPermittedString( self ):`

			`with self._lock:`

			`blacklist = []`
			`whitelist = []`

			`for ( tag_slice, rule ) in list(self._tag_slices_to_rules.items()):`

			`if rule == HC.FILTER_BLACKLIST:`

			`blacklist.append( tag_slice )`

			`elif rule == HC.FILTER_WHITELIST:`

			`whitelist.append( tag_slice )`



			`blacklist.sort()`
			`whitelist.sort()`

			`if len( blacklist ) == 0:`

			`return 'all tags'`

			`else:`

			`if set( blacklist ) == { '', ':' }:`

			`if len( whitelist ) == 0:`

			`text = 'no tags'`

			`else:`

			`text = 'only ' + ', '.join( ( ConvertTagSliceToString( tag_slice ) for tag_slice in whitelist ) )`


			`elif set( blacklist ) == { '' }:`

			`text = 'all namespaced tags'`

			`if len( whitelist ) > 0:`

			`text += ' and ' + ', '.join( ( ConvertTagSliceToString( tag_slice ) for tag_slice in whitelist ) )`


			`elif set( blacklist ) == { ':' }:`

			`text = 'all unnamespaced tags'`

			`if len( whitelist ) > 0:`

			`text += ' and ' + ', '.join( ( ConvertTagSliceToString( tag_slice ) for tag_slice in whitelist ) )`


			`else:`

			`text = 'all tags except ' + ', '.join( ( ConvertTagSliceToString( tag_slice ) for tag_slice in blacklist ) )`

			`if len( whitelist ) > 0:`

			`text += ' (except ' + ', '.join( ( ConvertTagSliceToString( tag_slice ) for tag_slice in whitelist ) ) + ')'`




			`return text`



			`HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_TAG_FILTER ] = TagFilter`