import collections import HydrusConstants as HC import itertools import os import threading import time import traceback import HydrusData import HydrusExceptions import re import HydrusGlobals def CensorshipMatch( tag, censorship ): if ':' in censorship: if censorship == ':': return ':' in tag # ':' - all namespaced tags else: return tag.startswith( censorship ) else: if censorship == '': return ':' not in tag # '' - all non namespaced tags else: # 'table' - normal tag, or namespaced version of same if ':' in tag: ( namespace, tag ) = tag.split( ':', 1 ) return tag == censorship def ConvertTagToSortable( t ): if t[0].isdigit(): # We want to maintain that: # 0 < 0a < 0b < 1 ( lexicographic comparison ) # -and- # 2 < 22 ( value comparison ) # So, if the first bit can be turned into an int, split it into ( int, extra ) int_component = '' i = 0 for character in t: if character.isdigit(): int_component += character else: break i += 1 str_component = t[i:] return ( int( int_component ), str_component ) else: return t def FilterNamespaces( tags, namespaces ): processed_tags = collections.defaultdict( set ) for tag in tags: if ':' in tag: ( namespace, subtag ) = tag.split( ':', 1 ) processed_tags[ namespace ].add( tag ) else: processed_tags[ '' ].add( tag ) result = set() for namespace in namespaces: if namespace in ( '', None ): result.update( processed_tags[ '' ] ) result.update( processed_tags[ namespace ] ) return result def SortTags( tags ): tags = list( tags ) tags.sort( key = ConvertTagToSortable ) return tags def CheckTagNotEmpty( tag ): empty_tag = False if tag == '': empty_tag = True if ':' in tag: ( namespace, subtag ) = tag.split( ':', 1 ) if subtag == '': empty_tag = True if empty_tag: raise HydrusExceptions.SizeException( 'Received a zero-length tag!' ) def CleanTag( tag ): try: tag = tag[:1024] tag = tag.lower() tag = HydrusData.ToUnicode( tag ) tag.replace( '\r', '' ) tag.replace( '\n', '' ) tag = re.sub( '[\\s]+', ' ', tag, flags = re.UNICODE ) # turns multiple spaces into single spaces tag = re.sub( '\\s\\Z', '', tag, flags = re.UNICODE ) # removes space at the end while re.match( '\\s|-|system:', tag, flags = re.UNICODE ) is not None: tag = re.sub( '\\A(\\s|-|system:)', '', tag, flags = re.UNICODE ) # removes spaces or garbage at the beginning except Exception as e: text = 'Was unable to parse the tag: ' + HydrusData.ToUnicode( tag ) text += os.linesep * 2 text += str( e ) raise Exception( text ) return tag def CleanTags( tags ): clean_tags = set() for tag in tags: tag = CleanTag( tag ) try: CheckTagNotEmpty( tag ) except HydrusExceptions.SizeException: continue clean_tags.add( tag ) return clean_tags