import collections import HydrusConstants as HC import itertools import os import threading import time import traceback import HydrusData import HydrusExceptions import re import HydrusGlobals as HG import HydrusText def CensorshipMatch( tag, censorships ): for censorship in censorships: if censorship == '': # '' - all non namespaced tags ( namespace, subtag ) = SplitTag( tag ) if namespace == '': return True elif censorship == ':': # ':' - all namespaced tags ( namespace, subtag ) = SplitTag( tag ) if namespace != '': return True elif ':' in censorship: if censorship.endswith( ':' ): # 'series:' - namespaced tags ( namespace, subtag ) = SplitTag( tag ) if namespace == censorship[:-1]: return True else: # 'series:evangelion' - exact match with namespace if tag == censorship: return True else: # 'table' - normal tag, or namespaced version of same ( namespace, subtag ) = SplitTag( tag ) if subtag == censorship: return True return False def CollapseMultipleSortedNumericTagsToMinMax( tags ): if len( tags ) <= 2: return tags else: includes_non_numeric_tag = True in ( not isinstance( ConvertTagToSortable( tag ), tuple ) for tag in tags ) if includes_non_numeric_tag: return tags else: # this list of tags is entirely numeric and may well be something like 1, 2, 3, 4, 5 # the caller wants to present 1-5 instead, so lets cut out the first and last if not isinstance( tags, list ): tags = list( tags ) return [ tags[0], tags[-1] ] def ConvertTagToSortable( t ): if len( t ) > 0 and t[0].isdecimal(): # We want to maintain that: # 0 < 0a < 0b < 1 ( lexicographic comparison ) # -and- # 2 < 22 ( value comparison ) # So, if the first bit can be turned into an int, split it into ( int, extra ) int_component = '' i = 0 for character in t: if character.isdecimal(): int_component += character else: break i += 1 str_component = t[i:] number = int( int_component ) return ( number, str_component ) else: return t def FilterNamespaces( tags, namespaces ): processed_tags = collections.defaultdict( set ) for tag in tags: ( namespace, subtag ) = SplitTag( tag ) processed_tags[ namespace ].add( tag ) result = set() for namespace in namespaces: if namespace == None: result.update( processed_tags[ '' ] ) else: result.update( processed_tags[ namespace ] ) return result def SortNumericTags( tags ): tags = list( tags ) tags.sort( key = ConvertTagToSortable ) return tags def CheckTagNotEmpty( tag ): ( namespace, subtag ) = SplitTag( tag ) if subtag == '': raise HydrusExceptions.SizeException( 'Received a zero-length tag!' ) def CleanTag( tag ): try: if tag is None: raise Exception() tag = tag[:1024] tag = tag.lower() tag = HydrusData.ToUnicode( tag ) tag = HydrusText.re_leading_single_colon.sub( '::', tag ) # Convert anything starting with one colon to start with two i.e. :D -> ::D if ':' in tag: tag = StripTextOfGumpf( tag ) # need to repeat here to catch 'system:' stuff ( namespace, subtag ) = SplitTag( tag ) namespace = StripTextOfGumpf( namespace ) subtag = StripTextOfGumpf( subtag ) tag = CombineTag( namespace, subtag ) else: tag = StripTextOfGumpf( tag ) except Exception as e: text = 'Was unable to parse the tag: ' + HydrusData.ToUnicode( tag ) text += os.linesep * 2 text += HydrusData.ToUnicode( e ) raise Exception( text ) return tag def CleanTags( tags ): clean_tags = set() for tag in tags: if tag is None: continue tag = CleanTag( tag ) try: CheckTagNotEmpty( tag ) except HydrusExceptions.SizeException: continue clean_tags.add( tag ) return clean_tags def CombineTag( namespace, subtag ): if namespace == '': if HydrusText.re_leading_single_colon.search( subtag ) is not None: return ':' + subtag else: return subtag else: return namespace + ':' + subtag def SplitTag( tag ): if ':' in tag: return tag.split( ':', 1 ) else: return ( '', tag ) def StripTextOfGumpf( t ): t = HydrusText.re_newlines.sub( '', t ) t = HydrusText.re_multiple_spaces.sub( ' ', t ) t = HydrusText.re_trailing_space.sub( '', t ) t = HydrusText.re_leading_space_or_garbage.sub( '', t ) return t