hydrus/include/HydrusTags.py

import collections
import HydrusConstants as HC
import itertools
import os
import threading
import time
import traceback
import HydrusData
import HydrusExceptions
import re
import HydrusGlobals as HG
import HydrusText

def CensorshipMatch( tag, censorships ):

    for censorship in censorships:

        if censorship == '': # '' - all non namespaced tags

            ( namespace, subtag ) = SplitTag( tag )

            if namespace == '':

                return True


        elif censorship == ':': # ':' - all namespaced tags

            ( namespace, subtag ) = SplitTag( tag )

            if namespace != '':

                return True


        elif ':' in censorship:

            if censorship.endswith( ':' ): # 'series:' - namespaced tags

                ( namespace, subtag ) = SplitTag( tag )

                if namespace == censorship[:-1]:

                    return True


            else: # 'series:evangelion' - exact match with namespace

                if tag == censorship:

                    return True


        else:

            # 'table' - normal tag, or namespaced version of same

            ( namespace, subtag ) = SplitTag( tag )

            if subtag == censorship:

                return True


    return False

def CollapseMultipleSortedNumericTagsToMinMax( tags ):

    if len( tags ) <= 2:

        return tags

    else:

        includes_non_numeric_tag = True in ( not isinstance( ConvertTagToSortable( tag ), tuple ) for tag in tags )

        if includes_non_numeric_tag:

            return tags

        else:

            # this list of tags is entirely numeric and may well be something like 1, 2, 3, 4, 5
            # the caller wants to present 1-5 instead, so lets cut out the first and last

            if not isinstance( tags, list ):

                tags = list( tags )


            return [ tags[0], tags[-1] ]


def ConvertTagToSortable( t ):

    if len( t ) > 0 and t[0].isdecimal():

        # We want to maintain that:
        # 0 < 0a < 0b < 1 ( lexicographic comparison )
        # -and-
        # 2 < 22 ( value comparison )
        # So, if the first bit can be turned into an int, split it into ( int, extra )

        int_component = ''

        i = 0

        for character in t:

            if character.isdecimal():

                int_component += character

            else:

                break


            i += 1


        str_component = t[i:]

        number = int( int_component )

        return ( number, str_component )

    else:

        return t


def FilterNamespaces( tags, namespaces ):

    processed_tags = collections.defaultdict( set )

    for tag in tags:

        ( namespace, subtag ) = SplitTag( tag )

        processed_tags[ namespace ].add( tag )


    result = set()

    for namespace in namespaces:

        if namespace == None:

            result.update( processed_tags[ '' ] )

        else:

            result.update( processed_tags[ namespace ] )


    return result

def SortNumericTags( tags ):

    tags = list( tags )

    tags.sort( key = ConvertTagToSortable )

    return tags

def CheckTagNotEmpty( tag ):

    ( namespace, subtag ) = SplitTag( tag )

    if subtag == '':

        raise HydrusExceptions.SizeException( 'Received a zero-length tag!' )


def CleanTag( tag ):

    try:

        tag = tag[:1024]

        tag = tag.lower()

        tag = HydrusData.ToUnicode( tag )

        if tag.startswith( ':' ):

            tag = HydrusText.re_leading_single_colon.sub( '::', tag ) # Convert anything starting with one colon to start with two i.e. :D -> ::D

            tag = StripTextOfGumpf( tag )

        elif ':' in tag:

            tag = StripTextOfGumpf( tag ) # need to repeat here to catch 'system:' stuff

            ( namespace, subtag ) = SplitTag( tag )

            namespace = StripTextOfGumpf( namespace )
            subtag = StripTextOfGumpf( subtag )

            tag = CombineTag( namespace, subtag )

        else:

            tag = StripTextOfGumpf( tag )


    except Exception as e:

        text = 'Was unable to parse the tag: ' + HydrusData.ToUnicode( tag )
        text += os.linesep * 2
        text += HydrusData.ToUnicode( e )

        raise Exception( text )


    return tag

def CleanTags( tags ):

    clean_tags = set()

    for tag in tags:

        tag = CleanTag( tag )

        try:

            CheckTagNotEmpty( tag )

        except HydrusExceptions.SizeException:

            continue


        clean_tags.add( tag )


    return clean_tags

def CombineTag( namespace, subtag ):

    if namespace == '':

        if subtag.startswith( ':' ):

            return ':' + subtag

        else:

            return subtag


    else:

        return namespace + ':' + subtag


def SplitTag( tag ):

    if ':' in tag:

        return tag.split( ':', 1 )

    else:

        return ( '', tag )


def StripTextOfGumpf( t ):

    t = HydrusText.re_newlines.sub( '', t )

    t = HydrusText.re_multiple_spaces.sub( ' ', t )

    t = HydrusText.re_trailing_space.sub( '', t )

    t = HydrusText.re_leading_space_or_garbage.sub( '', t )

    return t