2013-07-17 20:56:13 +00:00
|
|
|
import collections
|
|
|
|
import os
|
2015-03-25 22:04:19 +00:00
|
|
|
import re
|
2020-07-29 20:52:44 +00:00
|
|
|
|
|
|
|
from hydrus.core import HydrusConstants as HC
|
|
|
|
from hydrus.core import HydrusExceptions
|
2020-04-22 21:00:35 +00:00
|
|
|
from hydrus.core import HydrusText
|
2017-04-05 21:16:40 +00:00
|
|
|
|
2016-04-06 19:52:45 +00:00
|
|
|
def CensorshipMatch( tag, censorships ):
|
2014-03-12 22:08:23 +00:00
|
|
|
|
2016-04-06 19:52:45 +00:00
|
|
|
for censorship in censorships:
|
2014-03-12 22:08:23 +00:00
|
|
|
|
2016-04-06 19:52:45 +00:00
|
|
|
if censorship == '': # '' - all non namespaced tags
|
|
|
|
|
2017-02-08 22:27:00 +00:00
|
|
|
( namespace, subtag ) = SplitTag( tag )
|
|
|
|
|
|
|
|
if namespace == '':
|
2016-04-06 19:52:45 +00:00
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
elif censorship == ':': # ':' - all namespaced tags
|
|
|
|
|
2017-02-08 22:27:00 +00:00
|
|
|
( namespace, subtag ) = SplitTag( tag )
|
|
|
|
|
|
|
|
if namespace != '':
|
2016-04-06 19:52:45 +00:00
|
|
|
|
|
|
|
return True
|
|
|
|
|
2014-03-12 22:08:23 +00:00
|
|
|
|
2016-04-06 19:52:45 +00:00
|
|
|
elif ':' in censorship:
|
2014-03-12 22:08:23 +00:00
|
|
|
|
2016-04-06 19:52:45 +00:00
|
|
|
if censorship.endswith( ':' ): # 'series:' - namespaced tags
|
|
|
|
|
2017-02-08 22:27:00 +00:00
|
|
|
( namespace, subtag ) = SplitTag( tag )
|
|
|
|
|
2017-03-15 20:13:04 +00:00
|
|
|
if namespace == censorship[:-1]:
|
2016-04-06 19:52:45 +00:00
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
else: # 'series:evangelion' - exact match with namespace
|
|
|
|
|
|
|
|
if tag == censorship:
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
# 'table' - normal tag, or namespaced version of same
|
|
|
|
|
2017-02-08 22:27:00 +00:00
|
|
|
( namespace, subtag ) = SplitTag( tag )
|
|
|
|
|
|
|
|
if subtag == censorship:
|
2016-04-06 19:52:45 +00:00
|
|
|
|
2017-02-08 22:27:00 +00:00
|
|
|
return True
|
2016-04-06 19:52:45 +00:00
|
|
|
|
2014-03-12 22:08:23 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
2016-04-06 19:52:45 +00:00
|
|
|
return False
|
|
|
|
|
2018-02-28 22:30:36 +00:00
|
|
|
def CollapseMultipleSortedNumericTagsToMinMax( tags ):
|
|
|
|
|
|
|
|
if len( tags ) <= 2:
|
|
|
|
|
|
|
|
return tags
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
includes_non_numeric_tag = True in ( not isinstance( ConvertTagToSortable( tag ), tuple ) for tag in tags )
|
|
|
|
|
|
|
|
if includes_non_numeric_tag:
|
|
|
|
|
|
|
|
return tags
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
# this list of tags is entirely numeric and may well be something like 1, 2, 3, 4, 5
|
|
|
|
# the caller wants to present 1-5 instead, so lets cut out the first and last
|
|
|
|
|
|
|
|
if not isinstance( tags, list ):
|
|
|
|
|
|
|
|
tags = list( tags )
|
|
|
|
|
|
|
|
|
|
|
|
return [ tags[0], tags[-1] ]
|
|
|
|
|
|
|
|
|
|
|
|
|
2018-09-12 21:36:26 +00:00
|
|
|
def ConvertTagToSortable( tag ):
|
2016-12-14 21:19:07 +00:00
|
|
|
|
2018-09-26 19:05:12 +00:00
|
|
|
# this copies the human sort in hydrusdata
|
2018-09-12 21:36:26 +00:00
|
|
|
|
2019-01-16 22:40:53 +00:00
|
|
|
convert = lambda text: ( '', int( text ) ) if text.isdecimal() else ( text, 0 )
|
2018-09-12 21:36:26 +00:00
|
|
|
|
2019-01-09 22:59:03 +00:00
|
|
|
return tuple( [ convert( c ) for c in re.split( '([0-9]+)', tag.lower() ) ] )
|
2018-09-12 21:36:26 +00:00
|
|
|
|
|
|
|
# old method
|
|
|
|
|
|
|
|
'''if len( t ) > 0 and t[0].isdecimal():
|
2015-02-03 20:40:21 +00:00
|
|
|
|
|
|
|
# We want to maintain that:
|
|
|
|
# 0 < 0a < 0b < 1 ( lexicographic comparison )
|
|
|
|
# -and-
|
|
|
|
# 2 < 22 ( value comparison )
|
|
|
|
# So, if the first bit can be turned into an int, split it into ( int, extra )
|
|
|
|
|
|
|
|
int_component = ''
|
|
|
|
|
|
|
|
i = 0
|
|
|
|
|
|
|
|
for character in t:
|
|
|
|
|
2017-11-29 21:48:23 +00:00
|
|
|
if character.isdecimal():
|
|
|
|
|
|
|
|
int_component += character
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
break
|
|
|
|
|
2015-02-03 20:40:21 +00:00
|
|
|
|
|
|
|
i += 1
|
|
|
|
|
|
|
|
|
|
|
|
str_component = t[i:]
|
|
|
|
|
2016-12-14 21:19:07 +00:00
|
|
|
number = int( int_component )
|
|
|
|
|
|
|
|
return ( number, str_component )
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
return t
|
2018-09-12 21:36:26 +00:00
|
|
|
'''
|
2015-02-03 20:40:21 +00:00
|
|
|
|
2014-11-12 23:33:13 +00:00
|
|
|
def FilterNamespaces( tags, namespaces ):
|
|
|
|
|
|
|
|
processed_tags = collections.defaultdict( set )
|
|
|
|
|
|
|
|
for tag in tags:
|
|
|
|
|
2017-02-08 22:27:00 +00:00
|
|
|
( namespace, subtag ) = SplitTag( tag )
|
|
|
|
|
|
|
|
processed_tags[ namespace ].add( tag )
|
2014-11-12 23:33:13 +00:00
|
|
|
|
|
|
|
|
|
|
|
result = set()
|
|
|
|
|
|
|
|
for namespace in namespaces:
|
|
|
|
|
2017-02-08 22:27:00 +00:00
|
|
|
if namespace == None:
|
|
|
|
|
|
|
|
result.update( processed_tags[ '' ] )
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
result.update( processed_tags[ namespace ] )
|
|
|
|
|
2014-11-12 23:33:13 +00:00
|
|
|
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
2016-04-27 19:20:37 +00:00
|
|
|
def SortNumericTags( tags ):
|
2015-01-21 22:49:58 +00:00
|
|
|
|
|
|
|
tags = list( tags )
|
|
|
|
|
2015-02-03 20:40:21 +00:00
|
|
|
tags.sort( key = ConvertTagToSortable )
|
2015-01-21 22:49:58 +00:00
|
|
|
|
|
|
|
return tags
|
|
|
|
|
2015-03-25 22:04:19 +00:00
|
|
|
def CheckTagNotEmpty( tag ):
|
|
|
|
|
2017-02-08 22:27:00 +00:00
|
|
|
( namespace, subtag ) = SplitTag( tag )
|
2015-03-25 22:04:19 +00:00
|
|
|
|
2017-02-08 22:27:00 +00:00
|
|
|
if subtag == '':
|
2015-03-25 22:04:19 +00:00
|
|
|
|
2020-05-27 21:27:52 +00:00
|
|
|
raise HydrusExceptions.TagSizeException( 'Received a zero-length tag!' )
|
2015-03-25 22:04:19 +00:00
|
|
|
|
2017-04-05 21:16:40 +00:00
|
|
|
|
2015-03-25 22:04:19 +00:00
|
|
|
def CleanTag( tag ):
|
|
|
|
|
2015-07-15 20:28:26 +00:00
|
|
|
try:
|
|
|
|
|
2018-05-09 20:23:00 +00:00
|
|
|
if tag is None:
|
|
|
|
|
|
|
|
raise Exception()
|
|
|
|
|
|
|
|
|
2015-07-15 20:28:26 +00:00
|
|
|
tag = tag[:1024]
|
|
|
|
|
|
|
|
tag = tag.lower()
|
|
|
|
|
2018-05-16 20:09:50 +00:00
|
|
|
tag = HydrusText.re_leading_single_colon.sub( '::', tag ) # Convert anything starting with one colon to start with two i.e. :D -> ::D
|
|
|
|
|
|
|
|
if ':' in tag:
|
2017-03-02 02:14:56 +00:00
|
|
|
|
2017-04-05 21:16:40 +00:00
|
|
|
tag = StripTextOfGumpf( tag ) # need to repeat here to catch 'system:' stuff
|
|
|
|
|
2017-03-02 02:14:56 +00:00
|
|
|
( namespace, subtag ) = SplitTag( tag )
|
|
|
|
|
2017-04-05 21:16:40 +00:00
|
|
|
namespace = StripTextOfGumpf( namespace )
|
|
|
|
subtag = StripTextOfGumpf( subtag )
|
2017-03-02 02:14:56 +00:00
|
|
|
|
|
|
|
tag = CombineTag( namespace, subtag )
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
2017-04-05 21:16:40 +00:00
|
|
|
tag = StripTextOfGumpf( tag )
|
2015-07-15 20:28:26 +00:00
|
|
|
|
2015-12-09 23:16:41 +00:00
|
|
|
|
2015-07-15 20:28:26 +00:00
|
|
|
except Exception as e:
|
|
|
|
|
2019-01-09 22:59:03 +00:00
|
|
|
text = 'Was unable to parse the tag: ' + str( tag )
|
2015-07-15 20:28:26 +00:00
|
|
|
text += os.linesep * 2
|
2019-01-09 22:59:03 +00:00
|
|
|
text += str( e )
|
2015-03-25 22:04:19 +00:00
|
|
|
|
2015-07-15 20:28:26 +00:00
|
|
|
raise Exception( text )
|
2015-03-25 22:04:19 +00:00
|
|
|
|
|
|
|
|
|
|
|
return tag
|
|
|
|
|
|
|
|
def CleanTags( tags ):
|
|
|
|
|
|
|
|
clean_tags = set()
|
|
|
|
|
|
|
|
for tag in tags:
|
|
|
|
|
2018-05-09 20:23:00 +00:00
|
|
|
if tag is None:
|
|
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
2015-03-25 22:04:19 +00:00
|
|
|
tag = CleanTag( tag )
|
|
|
|
|
2017-05-31 21:50:53 +00:00
|
|
|
try:
|
|
|
|
|
|
|
|
CheckTagNotEmpty( tag )
|
|
|
|
|
2020-05-27 21:27:52 +00:00
|
|
|
except HydrusExceptions.TagSizeException:
|
2017-05-31 21:50:53 +00:00
|
|
|
|
|
|
|
continue
|
|
|
|
|
2015-03-25 22:04:19 +00:00
|
|
|
|
|
|
|
clean_tags.add( tag )
|
|
|
|
|
|
|
|
|
|
|
|
return clean_tags
|
|
|
|
|
2017-02-08 22:27:00 +00:00
|
|
|
def CombineTag( namespace, subtag ):
|
2015-12-09 23:16:41 +00:00
|
|
|
|
|
|
|
if namespace == '':
|
|
|
|
|
2018-05-16 20:09:50 +00:00
|
|
|
if HydrusText.re_leading_single_colon.search( subtag ) is not None:
|
2015-12-09 23:16:41 +00:00
|
|
|
|
2017-02-08 22:27:00 +00:00
|
|
|
return ':' + subtag
|
2015-12-09 23:16:41 +00:00
|
|
|
|
|
|
|
else:
|
|
|
|
|
2017-02-08 22:27:00 +00:00
|
|
|
return subtag
|
2015-12-09 23:16:41 +00:00
|
|
|
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
2017-02-08 22:27:00 +00:00
|
|
|
return namespace + ':' + subtag
|
2015-12-09 23:16:41 +00:00
|
|
|
|
|
|
|
|
2017-02-08 22:27:00 +00:00
|
|
|
def SplitTag( tag ):
|
|
|
|
|
|
|
|
if ':' in tag:
|
|
|
|
|
2019-08-15 00:40:48 +00:00
|
|
|
return tuple( tag.split( ':', 1 ) )
|
2017-02-08 22:27:00 +00:00
|
|
|
|
|
|
|
else:
|
|
|
|
|
2019-01-09 22:59:03 +00:00
|
|
|
return ( '', tag )
|
2017-02-08 22:27:00 +00:00
|
|
|
|
|
|
|
|
2017-04-05 21:16:40 +00:00
|
|
|
def StripTextOfGumpf( t ):
|
|
|
|
|
2017-12-13 22:33:07 +00:00
|
|
|
t = HydrusText.re_newlines.sub( '', t )
|
2017-04-05 21:16:40 +00:00
|
|
|
|
2017-12-13 22:33:07 +00:00
|
|
|
t = HydrusText.re_multiple_spaces.sub( ' ', t )
|
2017-04-05 21:16:40 +00:00
|
|
|
|
2019-02-27 23:03:30 +00:00
|
|
|
t = t.strip()
|
2017-04-05 21:16:40 +00:00
|
|
|
|
2017-12-13 22:33:07 +00:00
|
|
|
t = HydrusText.re_leading_space_or_garbage.sub( '', t )
|
2017-04-05 21:16:40 +00:00
|
|
|
|
|
|
|
return t
|
|
|
|
|
2020-03-04 22:12:53 +00:00
|
|
|
def TagOK( t ):
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
CheckTagNotEmpty( CleanTag( t ) )
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
except:
|
|
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|