866 lines
23 KiB
Python
866 lines
23 KiB
Python
import collections
|
|
import os
|
|
import re
|
|
import threading
|
|
|
|
from hydrus.core import HydrusConstants as HC
|
|
from hydrus.core import HydrusExceptions
|
|
from hydrus.core import HydrusSerialisable
|
|
from hydrus.core import HydrusText
|
|
|
|
def CensorshipMatch( tag, censorships ):
|
|
|
|
for censorship in censorships:
|
|
|
|
if censorship == '': # '' - all non namespaced tags
|
|
|
|
( namespace, subtag ) = SplitTag( tag )
|
|
|
|
if namespace == '':
|
|
|
|
return True
|
|
|
|
|
|
elif censorship == ':': # ':' - all namespaced tags
|
|
|
|
( namespace, subtag ) = SplitTag( tag )
|
|
|
|
if namespace != '':
|
|
|
|
return True
|
|
|
|
|
|
elif ':' in censorship:
|
|
|
|
if censorship.endswith( ':' ): # 'series:' - namespaced tags
|
|
|
|
( namespace, subtag ) = SplitTag( tag )
|
|
|
|
if namespace == censorship[:-1]:
|
|
|
|
return True
|
|
|
|
|
|
else: # 'series:evangelion' - exact match with namespace
|
|
|
|
if tag == censorship:
|
|
|
|
return True
|
|
|
|
|
|
|
|
else:
|
|
|
|
# 'table' - normal tag, or namespaced version of same
|
|
|
|
( namespace, subtag ) = SplitTag( tag )
|
|
|
|
if subtag == censorship:
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
return False
|
|
|
|
def CollapseMultipleSortedNumericTagsToMinMax( tags ):
|
|
|
|
if len( tags ) <= 2:
|
|
|
|
return tags
|
|
|
|
else:
|
|
|
|
includes_non_numeric_tag = True in ( not isinstance( ConvertTagToSortable( tag ), tuple ) for tag in tags )
|
|
|
|
if includes_non_numeric_tag:
|
|
|
|
return tags
|
|
|
|
else:
|
|
|
|
# this list of tags is entirely numeric and may well be something like 1, 2, 3, 4, 5
|
|
# the caller wants to present 1-5 instead, so lets cut out the first and last
|
|
|
|
if not isinstance( tags, list ):
|
|
|
|
tags = list( tags )
|
|
|
|
|
|
return [ tags[0], tags[-1] ]
|
|
|
|
|
|
|
|
def ConvertTagToSortable( tag ):
|
|
|
|
# this copies the human sort in hydrusdata
|
|
|
|
convert = lambda text: ( '', int( text ) ) if text.isdecimal() else ( text, 0 )
|
|
|
|
return tuple( [ convert( c ) for c in re.split( '([0-9]+)', tag.lower() ) ] )
|
|
|
|
# old method
|
|
|
|
'''if len( t ) > 0 and t[0].isdecimal():
|
|
|
|
# We want to maintain that:
|
|
# 0 < 0a < 0b < 1 ( lexicographic comparison )
|
|
# -and-
|
|
# 2 < 22 ( value comparison )
|
|
# So, if the first bit can be turned into an int, split it into ( int, extra )
|
|
|
|
int_component = ''
|
|
|
|
i = 0
|
|
|
|
for character in t:
|
|
|
|
if character.isdecimal():
|
|
|
|
int_component += character
|
|
|
|
else:
|
|
|
|
break
|
|
|
|
|
|
i += 1
|
|
|
|
|
|
str_component = t[i:]
|
|
|
|
number = int( int_component )
|
|
|
|
return ( number, str_component )
|
|
|
|
else:
|
|
|
|
return t
|
|
'''
|
|
|
|
def FilterNamespaces( tags, namespaces ):
|
|
|
|
processed_tags = collections.defaultdict( set )
|
|
|
|
for tag in tags:
|
|
|
|
( namespace, subtag ) = SplitTag( tag )
|
|
|
|
processed_tags[ namespace ].add( tag )
|
|
|
|
|
|
result = set()
|
|
|
|
for namespace in namespaces:
|
|
|
|
if namespace == None:
|
|
|
|
result.update( processed_tags[ '' ] )
|
|
|
|
else:
|
|
|
|
result.update( processed_tags[ namespace ] )
|
|
|
|
|
|
|
|
return result
|
|
|
|
def SortNumericTags( tags ):
|
|
|
|
tags = list( tags )
|
|
|
|
tags.sort( key = ConvertTagToSortable )
|
|
|
|
return tags
|
|
|
|
def CheckTagNotEmpty( tag ):
|
|
|
|
( namespace, subtag ) = SplitTag( tag )
|
|
|
|
if subtag == '':
|
|
|
|
raise HydrusExceptions.TagSizeException( 'Received a zero-length tag!' )
|
|
|
|
|
|
def CleanTag( tag ):
|
|
|
|
try:
|
|
|
|
if tag is None:
|
|
|
|
raise Exception()
|
|
|
|
|
|
tag = tag[:1024]
|
|
|
|
tag = tag.lower()
|
|
|
|
tag = HydrusText.re_leading_single_colon.sub( '::', tag ) # Convert anything starting with one colon to start with two i.e. :D -> ::D
|
|
|
|
if ':' in tag:
|
|
|
|
tag = StripTextOfGumpf( tag ) # need to repeat here to catch 'system:' stuff
|
|
|
|
( namespace, subtag ) = SplitTag( tag )
|
|
|
|
namespace = StripTextOfGumpf( namespace )
|
|
subtag = StripTextOfGumpf( subtag )
|
|
|
|
tag = CombineTag( namespace, subtag )
|
|
|
|
else:
|
|
|
|
tag = StripTextOfGumpf( tag )
|
|
|
|
|
|
except Exception as e:
|
|
|
|
text = 'Was unable to parse the tag: ' + str( tag )
|
|
text += os.linesep * 2
|
|
text += str( e )
|
|
|
|
raise Exception( text )
|
|
|
|
|
|
return tag
|
|
|
|
def CleanTags( tags ):
|
|
|
|
clean_tags = set()
|
|
|
|
for tag in tags:
|
|
|
|
if tag is None:
|
|
|
|
continue
|
|
|
|
|
|
tag = CleanTag( tag )
|
|
|
|
try:
|
|
|
|
CheckTagNotEmpty( tag )
|
|
|
|
except HydrusExceptions.TagSizeException:
|
|
|
|
continue
|
|
|
|
|
|
clean_tags.add( tag )
|
|
|
|
|
|
return clean_tags
|
|
|
|
def CombineTag( namespace, subtag ):
|
|
|
|
if namespace == '':
|
|
|
|
if HydrusText.re_leading_single_colon.search( subtag ) is not None:
|
|
|
|
return ':' + subtag
|
|
|
|
else:
|
|
|
|
return subtag
|
|
|
|
|
|
else:
|
|
|
|
return namespace + ':' + subtag
|
|
|
|
|
|
def ConvertTagSliceToString( tag_slice ):
|
|
|
|
if tag_slice == '':
|
|
|
|
return 'unnamespaced tags'
|
|
|
|
elif tag_slice == ':':
|
|
|
|
return 'namespaced tags'
|
|
|
|
elif tag_slice.count( ':' ) == 1 and tag_slice.endswith( ':' ):
|
|
|
|
namespace = tag_slice[ : -1 ]
|
|
|
|
return '\'' + namespace + '\' tags'
|
|
|
|
else:
|
|
|
|
return tag_slice
|
|
|
|
|
|
def IsUnnamespaced( tag ):
|
|
|
|
return SplitTag( tag )[0] == ''
|
|
|
|
def SplitTag( tag ):
|
|
|
|
if ':' in tag:
|
|
|
|
return tuple( tag.split( ':', 1 ) )
|
|
|
|
else:
|
|
|
|
return ( '', tag )
|
|
|
|
|
|
NULL_CHARACTER = '\x00'
|
|
|
|
def StripTextOfGumpf( t ):
|
|
|
|
t = HydrusText.re_newlines.sub( '', t )
|
|
|
|
t = HydrusText.re_multiple_spaces.sub( ' ', t )
|
|
|
|
t = t.strip()
|
|
|
|
t = HydrusText.re_leading_space_or_garbage.sub( '', t )
|
|
|
|
if NULL_CHARACTER in t:
|
|
|
|
t = t.replace( NULL_CHARACTER, '' )
|
|
|
|
|
|
return t
|
|
|
|
def TagOK( t ):
|
|
|
|
try:
|
|
|
|
CheckTagNotEmpty( CleanTag( t ) )
|
|
|
|
return True
|
|
|
|
except:
|
|
|
|
return False
|
|
|
|
|
|
class TagFilter( HydrusSerialisable.SerialisableBase ):
|
|
|
|
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_TAG_FILTER
|
|
SERIALISABLE_NAME = 'Tag Filter Rules'
|
|
SERIALISABLE_VERSION = 1
|
|
|
|
def __init__( self ):
|
|
|
|
HydrusSerialisable.SerialisableBase.__init__( self )
|
|
|
|
self._lock = threading.Lock()
|
|
|
|
self._tag_slices_to_rules = {}
|
|
|
|
self._all_unnamespaced_whitelisted = False
|
|
self._all_namespaced_whitelisted = False
|
|
self._namespaces_whitelist = set()
|
|
self._tags_whitelist = set()
|
|
|
|
self._all_unnamespaced_blacklisted = False
|
|
self._all_namespaced_blacklisted = False
|
|
self._namespaces_blacklist = set()
|
|
self._tags_blacklist = set()
|
|
|
|
self._namespaced_interesting = False
|
|
self._tags_interesting = False
|
|
|
|
|
|
def __eq__( self, other ):
|
|
|
|
if isinstance( other, TagFilter ):
|
|
|
|
return self._tag_slices_to_rules == other._tag_slices_to_rules
|
|
|
|
|
|
return NotImplemented
|
|
|
|
|
|
def _IterateTagSlices( self, tag, apply_unnamespaced_rules_to_namespaced_tags ):
|
|
|
|
# this guy gets called a lot, so we are making it an iterator
|
|
|
|
yield tag
|
|
|
|
( namespace, subtag ) = SplitTag( tag )
|
|
|
|
if tag != subtag and apply_unnamespaced_rules_to_namespaced_tags:
|
|
|
|
yield subtag
|
|
|
|
|
|
if namespace != '':
|
|
|
|
yield '{}:'.format( namespace )
|
|
yield ':'
|
|
|
|
else:
|
|
|
|
yield ''
|
|
|
|
|
|
|
|
def _GetSerialisableInfo( self ):
|
|
|
|
return list( self._tag_slices_to_rules.items() )
|
|
|
|
|
|
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
|
|
|
|
self._tag_slices_to_rules = dict( serialisable_info )
|
|
|
|
self._UpdateRuleCache()
|
|
|
|
|
|
def _TagOK( self, tag, apply_unnamespaced_rules_to_namespaced_tags = False ):
|
|
|
|
# old method, has a bunch of overhead due to iteration
|
|
'''
|
|
blacklist_encountered = False
|
|
|
|
for tag_slice in self._IterateTagSlices( tag, apply_unnamespaced_rules_to_namespaced_tags = apply_unnamespaced_rules_to_namespaced_tags ):
|
|
|
|
if tag_slice in self._tag_slices_to_rules:
|
|
|
|
rule = self._tag_slices_to_rules[ tag_slice ]
|
|
|
|
if rule == HC.FILTER_WHITELIST:
|
|
|
|
return True # there is an exception for this class of tag
|
|
|
|
elif rule == HC.FILTER_BLACKLIST: # there is a rule against this class of tag
|
|
|
|
blacklist_encountered = True
|
|
|
|
|
|
|
|
|
|
if blacklist_encountered: # rule against and no exceptions
|
|
|
|
return False
|
|
|
|
else:
|
|
|
|
return True # no rules against or explicitly for, so permitted
|
|
|
|
'''
|
|
|
|
#
|
|
|
|
# since this is called a whole bunch and overhead piles up, we are now splaying the logic out to hardcoded tests
|
|
|
|
blacklist_encountered = False
|
|
|
|
if self._tags_interesting:
|
|
|
|
if tag in self._tags_whitelist:
|
|
|
|
return True
|
|
|
|
|
|
if tag in self._tags_blacklist:
|
|
|
|
blacklist_encountered = True
|
|
|
|
|
|
|
|
if self._namespaced_interesting or apply_unnamespaced_rules_to_namespaced_tags:
|
|
|
|
( namespace, subtag ) = SplitTag( tag )
|
|
|
|
if apply_unnamespaced_rules_to_namespaced_tags and self._tags_interesting and subtag != tag:
|
|
|
|
if subtag in self._tags_whitelist:
|
|
|
|
return True
|
|
|
|
|
|
if subtag in self._tags_blacklist:
|
|
|
|
blacklist_encountered = True
|
|
|
|
|
|
|
|
if self._namespaced_interesting:
|
|
|
|
if namespace == '':
|
|
|
|
if self._all_unnamespaced_whitelisted:
|
|
|
|
return True
|
|
|
|
|
|
if self._all_unnamespaced_blacklisted:
|
|
|
|
blacklist_encountered = True
|
|
|
|
|
|
else:
|
|
|
|
if self._all_namespaced_whitelisted or namespace in self._namespaces_whitelist:
|
|
|
|
return True
|
|
|
|
|
|
if self._all_namespaced_blacklisted or namespace in self._namespaces_blacklist:
|
|
|
|
blacklist_encountered = True
|
|
|
|
|
|
|
|
|
|
|
|
if blacklist_encountered: # rule against and no exceptions
|
|
|
|
return False
|
|
|
|
else:
|
|
|
|
return True # no rules against or explicitly for, so permitted
|
|
|
|
|
|
|
|
def _UpdateRuleCache( self ):
|
|
|
|
self._all_unnamespaced_whitelisted = False
|
|
self._all_namespaced_whitelisted = False
|
|
self._namespaces_whitelist = set()
|
|
self._tags_whitelist = set()
|
|
|
|
self._all_unnamespaced_blacklisted = False
|
|
self._all_namespaced_blacklisted = False
|
|
self._namespaces_blacklist = set()
|
|
self._tags_blacklist = set()
|
|
|
|
self._namespaced_interesting = False
|
|
self._tags_interesting = False
|
|
|
|
for ( tag_slice, rule ) in self._tag_slices_to_rules.items():
|
|
|
|
if tag_slice == '':
|
|
|
|
if rule == HC.FILTER_WHITELIST:
|
|
|
|
self._all_unnamespaced_whitelisted = True
|
|
|
|
else:
|
|
|
|
self._all_unnamespaced_blacklisted = True
|
|
|
|
|
|
self._namespaced_interesting = True
|
|
|
|
elif tag_slice == ':':
|
|
|
|
if rule == HC.FILTER_WHITELIST:
|
|
|
|
self._all_namespaced_whitelisted = True
|
|
|
|
else:
|
|
|
|
self._all_namespaced_blacklisted = True
|
|
|
|
|
|
self._namespaced_interesting = True
|
|
|
|
elif tag_slice.count( ':' ) == 1 and tag_slice.endswith( ':' ):
|
|
|
|
if rule == HC.FILTER_WHITELIST:
|
|
|
|
self._namespaces_whitelist.add( tag_slice[:-1] )
|
|
|
|
else:
|
|
|
|
self._namespaces_blacklist.add( tag_slice[:-1] )
|
|
|
|
|
|
self._namespaced_interesting = True
|
|
|
|
else:
|
|
|
|
if rule == HC.FILTER_WHITELIST:
|
|
|
|
self._tags_whitelist.add( tag_slice )
|
|
|
|
else:
|
|
|
|
self._tags_blacklist.add( tag_slice )
|
|
|
|
|
|
self._tags_interesting = True
|
|
|
|
|
|
|
|
|
|
def AllowsEverything( self ):
|
|
|
|
with self._lock:
|
|
|
|
for ( tag_slice, rule ) in self._tag_slices_to_rules.items():
|
|
|
|
if rule == HC.FILTER_BLACKLIST:
|
|
|
|
return False
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
def CleanRules( self ):
|
|
|
|
new_tag_slices_to_rules = {}
|
|
|
|
for ( tag_slice, rule ) in self._tag_slices_to_rules.items():
|
|
|
|
if tag_slice == '':
|
|
|
|
pass
|
|
|
|
elif tag_slice == ':':
|
|
|
|
pass
|
|
|
|
elif tag_slice.count( ':' ) == 1 and tag_slice.endswith( ':' ):
|
|
|
|
example_tag = tag_slice + 'example'
|
|
|
|
try:
|
|
|
|
clean_example_tag = CleanTag( example_tag )
|
|
|
|
except:
|
|
|
|
continue
|
|
|
|
|
|
tag_slice = clean_example_tag[:-7]
|
|
|
|
else:
|
|
|
|
tag = tag_slice
|
|
|
|
try:
|
|
|
|
clean_tag = CleanTag( tag )
|
|
|
|
except:
|
|
|
|
continue
|
|
|
|
|
|
tag_slice = clean_tag
|
|
|
|
|
|
new_tag_slices_to_rules[ tag_slice ] = rule
|
|
|
|
|
|
self._tag_slices_to_rules = new_tag_slices_to_rules
|
|
|
|
self._UpdateRuleCache()
|
|
|
|
|
|
def Filter( self, tags, apply_unnamespaced_rules_to_namespaced_tags = False ):
|
|
|
|
with self._lock:
|
|
|
|
return { tag for tag in tags if self._TagOK( tag, apply_unnamespaced_rules_to_namespaced_tags = apply_unnamespaced_rules_to_namespaced_tags ) }
|
|
|
|
|
|
|
|
def GetTagSlicesToRules( self ):
|
|
|
|
with self._lock:
|
|
|
|
return dict( self._tag_slices_to_rules )
|
|
|
|
|
|
|
|
def SetRule( self, tag_slice, rule ):
|
|
|
|
with self._lock:
|
|
|
|
self._tag_slices_to_rules[ tag_slice ] = rule
|
|
|
|
self._UpdateRuleCache()
|
|
|
|
|
|
|
|
def TagOK( self, tag, apply_unnamespaced_rules_to_namespaced_tags = False ):
|
|
|
|
with self._lock:
|
|
|
|
return self._TagOK( tag, apply_unnamespaced_rules_to_namespaced_tags = apply_unnamespaced_rules_to_namespaced_tags )
|
|
|
|
|
|
|
|
def ToBlacklistString( self ):
|
|
|
|
with self._lock:
|
|
|
|
blacklist = []
|
|
whitelist = []
|
|
|
|
for ( tag_slice, rule ) in self._tag_slices_to_rules.items():
|
|
|
|
if rule == HC.FILTER_BLACKLIST:
|
|
|
|
blacklist.append( tag_slice )
|
|
|
|
elif rule == HC.FILTER_WHITELIST:
|
|
|
|
whitelist.append( tag_slice )
|
|
|
|
|
|
|
|
blacklist.sort()
|
|
whitelist.sort()
|
|
|
|
if len( blacklist ) == 0:
|
|
|
|
return 'no blacklist set'
|
|
|
|
else:
|
|
|
|
if set( blacklist ) == { '', ':' }:
|
|
|
|
text = 'blacklisting on any tags'
|
|
|
|
else:
|
|
|
|
text = 'blacklisting on ' + ', '.join( ( ConvertTagSliceToString( tag_slice ) for tag_slice in blacklist ) )
|
|
|
|
|
|
if len( whitelist ) > 0:
|
|
|
|
text += ' except ' + ', '.join( ( ConvertTagSliceToString( tag_slice ) for tag_slice in whitelist ) )
|
|
|
|
|
|
return text
|
|
|
|
|
|
|
|
|
|
def ToCensoredString( self ):
|
|
|
|
with self._lock:
|
|
|
|
blacklist = []
|
|
whitelist = []
|
|
|
|
for ( tag_slice, rule ) in list(self._tag_slices_to_rules.items()):
|
|
|
|
if rule == HC.FILTER_BLACKLIST:
|
|
|
|
blacklist.append( tag_slice )
|
|
|
|
elif rule == HC.FILTER_WHITELIST:
|
|
|
|
whitelist.append( tag_slice )
|
|
|
|
|
|
|
|
blacklist.sort()
|
|
whitelist.sort()
|
|
|
|
if len( blacklist ) == 0:
|
|
|
|
return 'all tags allowed'
|
|
|
|
else:
|
|
|
|
if set( blacklist ) == { '', ':' }:
|
|
|
|
text = 'no tags allowed'
|
|
|
|
else:
|
|
|
|
text = 'all but ' + ', '.join( ( ConvertTagSliceToString( tag_slice ) for tag_slice in blacklist ) ) + ' allowed'
|
|
|
|
|
|
if len( whitelist ) > 0:
|
|
|
|
text += ' except ' + ', '.join( ( ConvertTagSliceToString( tag_slice ) for tag_slice in whitelist ) )
|
|
|
|
|
|
return text
|
|
|
|
|
|
|
|
|
|
def ToPermittedString( self ):
|
|
|
|
with self._lock:
|
|
|
|
blacklist = []
|
|
whitelist = []
|
|
|
|
for ( tag_slice, rule ) in list(self._tag_slices_to_rules.items()):
|
|
|
|
if rule == HC.FILTER_BLACKLIST:
|
|
|
|
blacklist.append( tag_slice )
|
|
|
|
elif rule == HC.FILTER_WHITELIST:
|
|
|
|
whitelist.append( tag_slice )
|
|
|
|
|
|
|
|
blacklist.sort()
|
|
whitelist.sort()
|
|
|
|
if len( blacklist ) == 0:
|
|
|
|
return 'all tags'
|
|
|
|
else:
|
|
|
|
if set( blacklist ) == { '', ':' }:
|
|
|
|
if len( whitelist ) == 0:
|
|
|
|
text = 'no tags'
|
|
|
|
else:
|
|
|
|
text = 'only ' + ', '.join( ( ConvertTagSliceToString( tag_slice ) for tag_slice in whitelist ) )
|
|
|
|
|
|
elif set( blacklist ) == { '' }:
|
|
|
|
text = 'all namespaced tags'
|
|
|
|
if len( whitelist ) > 0:
|
|
|
|
text += ' and ' + ', '.join( ( ConvertTagSliceToString( tag_slice ) for tag_slice in whitelist ) )
|
|
|
|
|
|
elif set( blacklist ) == { ':' }:
|
|
|
|
text = 'all unnamespaced tags'
|
|
|
|
if len( whitelist ) > 0:
|
|
|
|
text += ' and ' + ', '.join( ( ConvertTagSliceToString( tag_slice ) for tag_slice in whitelist ) )
|
|
|
|
|
|
else:
|
|
|
|
text = 'all tags except ' + ', '.join( ( ConvertTagSliceToString( tag_slice ) for tag_slice in blacklist ) )
|
|
|
|
if len( whitelist ) > 0:
|
|
|
|
text += ' (except ' + ', '.join( ( ConvertTagSliceToString( tag_slice ) for tag_slice in whitelist ) ) + ')'
|
|
|
|
|
|
|
|
|
|
return text
|
|
|
|
|
|
|
|
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_TAG_FILTER ] = TagFilter
|
|
|