157 lines
3.6 KiB
Python
157 lines
3.6 KiB
Python
import collections
|
|
import HydrusConstants as HC
|
|
import itertools
|
|
import os
|
|
import threading
|
|
import time
|
|
import traceback
|
|
import HydrusData
|
|
import HydrusExceptions
|
|
import re
|
|
import HydrusGlobals
|
|
|
|
def CensorshipMatch( tag, censorship ):
|
|
|
|
if ':' in censorship:
|
|
|
|
if censorship == ':': return ':' in tag # ':' - all namespaced tags
|
|
else: return tag.startswith( censorship )
|
|
|
|
else:
|
|
|
|
if censorship == '': return ':' not in tag # '' - all non namespaced tags
|
|
else: # 'table' - normal tag, or namespaced version of same
|
|
|
|
if ':' in tag: ( namespace, tag ) = tag.split( ':', 1 )
|
|
|
|
return tag == censorship
|
|
|
|
|
|
|
|
def ConvertTagToSortable( t ):
|
|
|
|
if t[0].isdigit():
|
|
|
|
# We want to maintain that:
|
|
# 0 < 0a < 0b < 1 ( lexicographic comparison )
|
|
# -and-
|
|
# 2 < 22 ( value comparison )
|
|
# So, if the first bit can be turned into an int, split it into ( int, extra )
|
|
|
|
int_component = ''
|
|
|
|
i = 0
|
|
|
|
for character in t:
|
|
|
|
if character.isdigit(): int_component += character
|
|
else: break
|
|
|
|
i += 1
|
|
|
|
|
|
str_component = t[i:]
|
|
|
|
return ( int( int_component ), str_component )
|
|
|
|
else: return t
|
|
|
|
def FilterNamespaces( tags, namespaces ):
|
|
|
|
processed_tags = collections.defaultdict( set )
|
|
|
|
for tag in tags:
|
|
|
|
if ':' in tag:
|
|
|
|
( namespace, subtag ) = tag.split( ':', 1 )
|
|
|
|
processed_tags[ namespace ].add( tag )
|
|
|
|
else: processed_tags[ '' ].add( tag )
|
|
|
|
|
|
result = set()
|
|
|
|
for namespace in namespaces:
|
|
|
|
if namespace in ( '', None ): result.update( processed_tags[ '' ] )
|
|
|
|
result.update( processed_tags[ namespace ] )
|
|
|
|
|
|
return result
|
|
|
|
def SortTags( tags ):
|
|
|
|
tags = list( tags )
|
|
|
|
tags.sort( key = ConvertTagToSortable )
|
|
|
|
return tags
|
|
|
|
def CheckTagNotEmpty( tag ):
|
|
|
|
empty_tag = False
|
|
|
|
if tag == '': empty_tag = True
|
|
|
|
if ':' in tag:
|
|
|
|
( namespace, subtag ) = tag.split( ':', 1 )
|
|
|
|
if subtag == '': empty_tag = True
|
|
|
|
|
|
if empty_tag: raise HydrusExceptions.SizeException( 'Received a zero-length tag!' )
|
|
|
|
def CleanTag( tag ):
|
|
|
|
try:
|
|
|
|
tag = tag[:1024]
|
|
|
|
tag = tag.lower()
|
|
|
|
tag = HydrusData.ToUnicode( tag )
|
|
|
|
tag.replace( '\r', '' )
|
|
tag.replace( '\n', '' )
|
|
|
|
tag = re.sub( '[\\s]+', ' ', tag, flags = re.UNICODE ) # turns multiple spaces into single spaces
|
|
|
|
tag = re.sub( '\\s\\Z', '', tag, flags = re.UNICODE ) # removes space at the end
|
|
|
|
while re.match( '\\s|-|system:', tag, flags = re.UNICODE ) is not None:
|
|
|
|
tag = re.sub( '\\A(\\s|-|system:)', '', tag, flags = re.UNICODE ) # removes spaces or garbage at the beginning
|
|
|
|
|
|
except Exception as e:
|
|
|
|
text = 'Was unable to parse the tag: ' + HydrusData.ToUnicode( tag )
|
|
text += os.linesep * 2
|
|
text += str( e )
|
|
|
|
raise Exception( text )
|
|
|
|
|
|
return tag
|
|
|
|
def CleanTags( tags ):
|
|
|
|
clean_tags = set()
|
|
|
|
for tag in tags:
|
|
|
|
tag = CleanTag( tag )
|
|
|
|
try: CheckTagNotEmpty( tag )
|
|
except HydrusExceptions.SizeException: continue
|
|
|
|
clean_tags.add( tag )
|
|
|
|
|
|
return clean_tags
|
|
|