hydrus/include/HydrusTags.py

import collections
import HydrusConstants as HC
import itertools
import os
import threading
import time
import traceback
import HydrusData
import HydrusExceptions
import re
import HydrusGlobals as HG
import HydrusText

def CensorshipMatch( tag, censorships ):
    
    for censorship in censorships:
        
        if censorship == '': # '' - all non namespaced tags
            
            ( namespace, subtag ) = SplitTag( tag )
            
            if namespace == '':
                
                return True
                
            
        elif censorship == ':': # ':' - all namespaced tags
            
            ( namespace, subtag ) = SplitTag( tag )
            
            if namespace != '':
                
                return True
                
            
        elif ':' in censorship:
            
            if censorship.endswith( ':' ): # 'series:' - namespaced tags
                
                ( namespace, subtag ) = SplitTag( tag )
                
                if namespace == censorship[:-1]:
                    
                    return True
                    
                
            else: # 'series:evangelion' - exact match with namespace
                
                if tag == censorship:
                    
                    return True
                    
                
            
        else:
            
            # 'table' - normal tag, or namespaced version of same
            
            ( namespace, subtag ) = SplitTag( tag )
            
            if subtag == censorship:
                
                return True
                
            
        
    
    return False
    
def CollapseMultipleSortedNumericTagsToMinMax( tags ):
    
    if len( tags ) <= 2:
        
        return tags
        
    else:
        
        includes_non_numeric_tag = True in ( not isinstance( ConvertTagToSortable( tag ), tuple ) for tag in tags )
        
        if includes_non_numeric_tag:
            
            return tags
            
        else:
            
            # this list of tags is entirely numeric and may well be something like 1, 2, 3, 4, 5
            # the caller wants to present 1-5 instead, so lets cut out the first and last
            
            if not isinstance( tags, list ):
                
                tags = list( tags )
                
            
            return [ tags[0], tags[-1] ]
            
        
    
def ConvertTagToSortable( t ):
    
    if len( t ) > 0 and t[0].isdecimal():
        
        # We want to maintain that:
        # 0 < 0a < 0b < 1 ( lexicographic comparison )
        # -and-
        # 2 < 22 ( value comparison )
        # So, if the first bit can be turned into an int, split it into ( int, extra )
        
        int_component = ''
        
        i = 0
        
        for character in t:
            
            if character.isdecimal():
                
                int_component += character
                
            else:
                
                break
                
            
            i += 1
            
        
        str_component = t[i:]
        
        number = int( int_component )
        
        return ( number, str_component )
        
    else:
        
        return t
        

def FilterNamespaces( tags, namespaces ):
    
    processed_tags = collections.defaultdict( set )
    
    for tag in tags:
        
        ( namespace, subtag ) = SplitTag( tag )
        
        processed_tags[ namespace ].add( tag )
        
    
    result = set()
    
    for namespace in namespaces:
        
        if namespace == None:
            
            result.update( processed_tags[ '' ] )
            
        else:
            
            result.update( processed_tags[ namespace ] )
            
        
    
    return result
    
def SortNumericTags( tags ):
    
    tags = list( tags )
    
    tags.sort( key = ConvertTagToSortable )
    
    return tags
    
def CheckTagNotEmpty( tag ):
    
    ( namespace, subtag ) = SplitTag( tag )
    
    if subtag == '':
        
        raise HydrusExceptions.SizeException( 'Received a zero-length tag!' )
        
    
def CleanTag( tag ):
    
    try:
        
        if tag is None:
            
            raise Exception()
            
        
        tag = tag[:1024]
        
        tag = tag.lower()
        
        tag = HydrusData.ToUnicode( tag )
        
        tag = HydrusText.re_leading_single_colon.sub( '::', tag ) # Convert anything starting with one colon to start with two i.e. :D -> ::D
        
        if ':' in tag:
            
            tag = StripTextOfGumpf( tag ) # need to repeat here to catch 'system:' stuff
            
            ( namespace, subtag ) = SplitTag( tag )
            
            namespace = StripTextOfGumpf( namespace )
            subtag = StripTextOfGumpf( subtag )
            
            tag = CombineTag( namespace, subtag )
            
        else:
            
            tag = StripTextOfGumpf( tag )
            
        
    except Exception as e:
        
        text = 'Was unable to parse the tag: ' + HydrusData.ToUnicode( tag )
        text += os.linesep * 2
        text += HydrusData.ToUnicode( e )
        
        raise Exception( text )
        
    
    return tag

def CleanTags( tags ):
    
    clean_tags = set()
    
    for tag in tags:
        
        if tag is None:
            
            continue
            
        
        tag = CleanTag( tag )
        
        try:
            
            CheckTagNotEmpty( tag )
            
        except HydrusExceptions.SizeException:
            
            continue
            
        
        clean_tags.add( tag )
        
    
    return clean_tags
    
def CombineTag( namespace, subtag ):
    
    if namespace == '':
        
        if HydrusText.re_leading_single_colon.search( subtag ) is not None:
            
            return ':' + subtag
            
        else:
            
            return subtag
            
        
    else:
        
        return namespace + ':' + subtag
        
    
def SplitTag( tag ):
    
    if ':' in tag:
        
        return tag.split( ':', 1 )
        
    else:
        
        return ( '', tag )
        
    
def StripTextOfGumpf( t ):
    
    t = HydrusText.re_newlines.sub( '', t )
    
    t = HydrusText.re_multiple_spaces.sub( ' ', t )
    
    t = HydrusText.re_trailing_space.sub( '', t )
    
    t = HydrusText.re_leading_space_or_garbage.sub( '', t )
    
    return t
Version 77 2013-07-17 20:56:13 +00:00			`import collections`
			`import HydrusConstants as HC`
			`import itertools`
			`import os`
			`import threading`
			`import time`
			`import traceback`
Version 151 2015-03-25 22:04:19 +00:00			`import HydrusData`
			`import HydrusExceptions`
			`import re`
Version 255 2017-05-10 21:33:58 +00:00			`import HydrusGlobals as HG`
Version 286 2017-12-13 22:33:07 +00:00			`import HydrusText`
Version 250 2017-04-05 21:16:40 +00:00
Version 200 2016-04-06 19:52:45 +00:00			`def CensorshipMatch( tag, censorships ):`
Version 107 2014-03-12 22:08:23 +00:00
Version 200 2016-04-06 19:52:45 +00:00			`for censorship in censorships:`
Version 107 2014-03-12 22:08:23 +00:00
Version 200 2016-04-06 19:52:45 +00:00			`if censorship == '': # '' - all non namespaced tags`

Version 244 2017-02-08 22:27:00 +00:00			`( namespace, subtag ) = SplitTag( tag )`

			`if namespace == '':`
Version 200 2016-04-06 19:52:45 +00:00
			`return True`


			`elif censorship == ':': # ':' - all namespaced tags`

Version 244 2017-02-08 22:27:00 +00:00			`( namespace, subtag ) = SplitTag( tag )`

			`if namespace != '':`
Version 200 2016-04-06 19:52:45 +00:00
			`return True`

Version 107 2014-03-12 22:08:23 +00:00
Version 200 2016-04-06 19:52:45 +00:00			`elif ':' in censorship:`
Version 107 2014-03-12 22:08:23 +00:00
Version 200 2016-04-06 19:52:45 +00:00			`if censorship.endswith( ':' ): # 'series:' - namespaced tags`

Version 244 2017-02-08 22:27:00 +00:00			`( namespace, subtag ) = SplitTag( tag )`

Version 247 2017-03-15 20:13:04 +00:00			`if namespace == censorship[:-1]:`
Version 200 2016-04-06 19:52:45 +00:00
			`return True`


			`else: # 'series:evangelion' - exact match with namespace`

			`if tag == censorship:`

			`return True`



			`else:`

			`# 'table' - normal tag, or namespaced version of same`

Version 244 2017-02-08 22:27:00 +00:00			`( namespace, subtag ) = SplitTag( tag )`

			`if subtag == censorship:`
Version 200 2016-04-06 19:52:45 +00:00
Version 244 2017-02-08 22:27:00 +00:00			`return True`
Version 200 2016-04-06 19:52:45 +00:00
Version 107 2014-03-12 22:08:23 +00:00


Version 200 2016-04-06 19:52:45 +00:00			`return False`

Version 296 2018-02-28 22:30:36 +00:00			`def CollapseMultipleSortedNumericTagsToMinMax( tags ):`

			`if len( tags ) <= 2:`

			`return tags`

			`else:`

			`includes_non_numeric_tag = True in ( not isinstance( ConvertTagToSortable( tag ), tuple ) for tag in tags )`

			`if includes_non_numeric_tag:`

			`return tags`

			`else:`

			`# this list of tags is entirely numeric and may well be something like 1, 2, 3, 4, 5`
			`# the caller wants to present 1-5 instead, so lets cut out the first and last`

			`if not isinstance( tags, list ):`

			`tags = list( tags )`


			`return [ tags[0], tags[-1] ]`



Version 146 2015-02-03 20:40:21 +00:00			`def ConvertTagToSortable( t ):`
Version 236 2016-12-14 21:19:07 +00:00
Version 244 2017-02-08 22:27:00 +00:00			`if len( t ) > 0 and t[0].isdecimal():`
Version 146 2015-02-03 20:40:21 +00:00
			`# We want to maintain that:`
			`# 0 < 0a < 0b < 1 ( lexicographic comparison )`
			`# -and-`
			`# 2 < 22 ( value comparison )`
			`# So, if the first bit can be turned into an int, split it into ( int, extra )`

			`int_component = ''`

			`i = 0`

			`for character in t:`

Version 284 2017-11-29 21:48:23 +00:00			`if character.isdecimal():`

			`int_component += character`

			`else:`

			`break`

Version 146 2015-02-03 20:40:21 +00:00
			`i += 1`


			`str_component = t[i:]`

Version 236 2016-12-14 21:19:07 +00:00			`number = int( int_component )`

			`return ( number, str_component )`

			`else:`

			`return t`
Version 146 2015-02-03 20:40:21 +00:00

Version 136 2014-11-12 23:33:13 +00:00			`def FilterNamespaces( tags, namespaces ):`

			`processed_tags = collections.defaultdict( set )`

			`for tag in tags:`

Version 244 2017-02-08 22:27:00 +00:00			`( namespace, subtag ) = SplitTag( tag )`

			`processed_tags[ namespace ].add( tag )`
Version 136 2014-11-12 23:33:13 +00:00

			`result = set()`

			`for namespace in namespaces:`

Version 244 2017-02-08 22:27:00 +00:00			`if namespace == None:`

			`result.update( processed_tags[ '' ] )`

			`else:`

			`result.update( processed_tags[ namespace ] )`

Version 136 2014-11-12 23:33:13 +00:00

			`return result`

Version 203 2016-04-27 19:20:37 +00:00			`def SortNumericTags( tags ):`
Version 145 2015-01-21 22:49:58 +00:00
			`tags = list( tags )`

Version 146 2015-02-03 20:40:21 +00:00			`tags.sort( key = ConvertTagToSortable )`
Version 145 2015-01-21 22:49:58 +00:00
			`return tags`

Version 151 2015-03-25 22:04:19 +00:00			`def CheckTagNotEmpty( tag ):`

Version 244 2017-02-08 22:27:00 +00:00			`( namespace, subtag ) = SplitTag( tag )`
Version 151 2015-03-25 22:04:19 +00:00
Version 244 2017-02-08 22:27:00 +00:00			`if subtag == '':`
Version 151 2015-03-25 22:04:19 +00:00
Version 244 2017-02-08 22:27:00 +00:00			`raise HydrusExceptions.SizeException( 'Received a zero-length tag!' )`
Version 151 2015-03-25 22:04:19 +00:00
Version 250 2017-04-05 21:16:40 +00:00
Version 151 2015-03-25 22:04:19 +00:00			`def CleanTag( tag ):`

Version 165 2015-07-15 20:28:26 +00:00			`try:`

Version 306 2018-05-09 20:23:00 +00:00			`if tag is None:`

			`raise Exception()`


Version 165 2015-07-15 20:28:26 +00:00			`tag = tag[:1024]`

			`tag = tag.lower()`

Version 180 2015-11-04 22:30:28 +00:00			`tag = HydrusData.ToUnicode( tag )`
Version 165 2015-07-15 20:28:26 +00:00
Version 307 2018-05-16 20:09:50 +00:00			`tag = HydrusText.re_leading_single_colon.sub( '::', tag ) # Convert anything starting with one colon to start with two i.e. :D -> ::D`

			`if ':' in tag:`
Version 245 2017-03-02 02:14:56 +00:00
Version 250 2017-04-05 21:16:40 +00:00			`tag = StripTextOfGumpf( tag ) # need to repeat here to catch 'system:' stuff`

Version 245 2017-03-02 02:14:56 +00:00			`( namespace, subtag ) = SplitTag( tag )`

Version 250 2017-04-05 21:16:40 +00:00			`namespace = StripTextOfGumpf( namespace )`
			`subtag = StripTextOfGumpf( subtag )`
Version 245 2017-03-02 02:14:56 +00:00
			`tag = CombineTag( namespace, subtag )`

			`else:`

Version 250 2017-04-05 21:16:40 +00:00			`tag = StripTextOfGumpf( tag )`
Version 165 2015-07-15 20:28:26 +00:00
Version 185 2015-12-09 23:16:41 +00:00
Version 165 2015-07-15 20:28:26 +00:00			`except Exception as e:`

Version 182 2015-11-18 22:44:07 +00:00			`text = 'Was unable to parse the tag: ' + HydrusData.ToUnicode( tag )`
Version 165 2015-07-15 20:28:26 +00:00			`text += os.linesep * 2`
Version 249 2017-03-29 19:39:34 +00:00			`text += HydrusData.ToUnicode( e )`
Version 151 2015-03-25 22:04:19 +00:00
Version 165 2015-07-15 20:28:26 +00:00			`raise Exception( text )`
Version 151 2015-03-25 22:04:19 +00:00

			`return tag`

			`def CleanTags( tags ):`

			`clean_tags = set()`

			`for tag in tags:`

Version 306 2018-05-09 20:23:00 +00:00			`if tag is None:`

			`continue`


Version 151 2015-03-25 22:04:19 +00:00			`tag = CleanTag( tag )`

Version 258 2017-05-31 21:50:53 +00:00			`try:`

			`CheckTagNotEmpty( tag )`

			`except HydrusExceptions.SizeException:`

			`continue`

Version 151 2015-03-25 22:04:19 +00:00
			`clean_tags.add( tag )`


			`return clean_tags`

Version 244 2017-02-08 22:27:00 +00:00			`def CombineTag( namespace, subtag ):`
Version 185 2015-12-09 23:16:41 +00:00
			`if namespace == '':`

Version 307 2018-05-16 20:09:50 +00:00			`if HydrusText.re_leading_single_colon.search( subtag ) is not None:`
Version 185 2015-12-09 23:16:41 +00:00
Version 244 2017-02-08 22:27:00 +00:00			`return ':' + subtag`
Version 185 2015-12-09 23:16:41 +00:00
			`else:`

Version 244 2017-02-08 22:27:00 +00:00			`return subtag`
Version 185 2015-12-09 23:16:41 +00:00

			`else:`

Version 244 2017-02-08 22:27:00 +00:00			`return namespace + ':' + subtag`
Version 185 2015-12-09 23:16:41 +00:00

Version 244 2017-02-08 22:27:00 +00:00			`def SplitTag( tag ):`

			`if ':' in tag:`

			`return tag.split( ':', 1 )`

			`else:`

			`return ( '', tag )`


Version 250 2017-04-05 21:16:40 +00:00			`def StripTextOfGumpf( t ):`

Version 286 2017-12-13 22:33:07 +00:00			`t = HydrusText.re_newlines.sub( '', t )`
Version 250 2017-04-05 21:16:40 +00:00
Version 286 2017-12-13 22:33:07 +00:00			`t = HydrusText.re_multiple_spaces.sub( ' ', t )`
Version 250 2017-04-05 21:16:40 +00:00
Version 286 2017-12-13 22:33:07 +00:00			`t = HydrusText.re_trailing_space.sub( '', t )`
Version 250 2017-04-05 21:16:40 +00:00
Version 286 2017-12-13 22:33:07 +00:00			`t = HydrusText.re_leading_space_or_garbage.sub( '', t )`
Version 250 2017-04-05 21:16:40 +00:00
			`return t`