hydrus/hydrus/client/db/ClientDBFilesSearch.py

import random
import sqlite3
import typing

from hydrus.core import HydrusConstants as HC
from hydrus.core import HydrusData
from hydrus.core import HydrusDB
from hydrus.core import HydrusExceptions
from hydrus.core import HydrusGlobals as HG
from hydrus.core import HydrusTags
from hydrus.core import HydrusTime

from hydrus.client import ClientConstants as CC
from hydrus.client import ClientLocation
from hydrus.client import ClientThreading
from hydrus.client.db import ClientDBDefinitionsCache
from hydrus.client.db import ClientDBFilesDuplicates
from hydrus.client.db import ClientDBFilesInbox
from hydrus.client.db import ClientDBFilesMetadataBasic
from hydrus.client.db import ClientDBFilesStorage
from hydrus.client.db import ClientDBFilesTimestamps
from hydrus.client.db import ClientDBFilesViewingStats
from hydrus.client.db import ClientDBMappingsCounts
from hydrus.client.db import ClientDBMappingsStorage
from hydrus.client.db import ClientDBMaster
from hydrus.client.db import ClientDBModule
from hydrus.client.db import ClientDBNotesMap
from hydrus.client.db import ClientDBServices
from hydrus.client.db import ClientDBSimilarFiles
from hydrus.client.db import ClientDBTagSearch
from hydrus.client.db import ClientDBURLMap
from hydrus.client.media import ClientMedia
from hydrus.client.metadata import ClientTags
from hydrus.client.search import ClientSearch

def intersection_update_qhi( query_hash_ids: typing.Optional[ typing.Set[ int ] ], some_hash_ids: typing.Collection[ int ], force_create_new_set = False ) -> typing.Set[ int ]:

    if query_hash_ids is None:

        if not isinstance( some_hash_ids, set ) or force_create_new_set:

            some_hash_ids = set( some_hash_ids )


        return some_hash_ids

    else:

        query_hash_ids.intersection_update( some_hash_ids )

        return query_hash_ids


def GetFilesInfoPredicates( system_predicates: ClientSearch.FileSystemPredicates ):

    simple_preds = system_predicates.GetSimpleInfo()

    files_info_predicates = []

    if 'min_size' in simple_preds:

        files_info_predicates.append( 'size > ' + str( simple_preds[ 'min_size' ] ) )

    if 'size' in simple_preds:

        files_info_predicates.append( 'size = ' + str( simple_preds[ 'size' ] ) )

    if 'not_size' in simple_preds:

        files_info_predicates.append( 'size != ' + str( simple_preds[ 'not_size' ] ) )

    if 'max_size' in simple_preds:

        files_info_predicates.append( 'size < ' + str( simple_preds[ 'max_size' ] ) )


    if 'mimes' in simple_preds:

        mimes = simple_preds[ 'mimes' ]

        if len( mimes ) == 1:

            ( mime, ) = mimes

            files_info_predicates.append( 'mime = ' + str( mime ) )

        else:

            files_info_predicates.append( 'mime IN ' + HydrusData.SplayListForDB( mimes ) )


    if 'has_audio' in simple_preds:

        has_audio = simple_preds[ 'has_audio' ]

        files_info_predicates.append( 'has_audio = {}'.format( int( has_audio ) ) )


    if 'min_width' in simple_preds:

        files_info_predicates.append( 'width > ' + str( simple_preds[ 'min_width' ] ) )

    if 'width' in simple_preds:

        files_info_predicates.append( 'width = ' + str( simple_preds[ 'width' ] ) )

    if 'not_width' in simple_preds:

        files_info_predicates.append( 'width != ' + str( simple_preds[ 'not_width' ] ) )

    if 'max_width' in simple_preds:

        files_info_predicates.append( 'width < ' + str( simple_preds[ 'max_width' ] ) )


    if 'min_height' in simple_preds:

        files_info_predicates.append( 'height > ' + str( simple_preds[ 'min_height' ] ) )

    if 'height' in simple_preds:

        files_info_predicates.append( 'height = ' + str( simple_preds[ 'height' ] ) )

    if 'not_height' in simple_preds:

        files_info_predicates.append( 'height != ' + str( simple_preds[ 'not_height' ] ) )

    if 'max_height' in simple_preds:

        files_info_predicates.append( 'height < ' + str( simple_preds[ 'max_height' ] ) )


    if 'min_num_pixels' in simple_preds:

        files_info_predicates.append( 'width * height > ' + str( simple_preds[ 'min_num_pixels' ] ) )

    if 'num_pixels' in simple_preds:

        files_info_predicates.append( 'width * height = ' + str( simple_preds[ 'num_pixels' ] ) )

    if 'not_num_pixels' in simple_preds:

        files_info_predicates.append( 'width * height != ' + str( simple_preds[ 'not_num_pixels' ] ) )

    if 'max_num_pixels' in simple_preds:

        files_info_predicates.append( 'width * height < ' + str( simple_preds[ 'max_num_pixels' ] ) )


    if 'min_ratio' in simple_preds:

        ( ratio_width, ratio_height ) = simple_preds[ 'min_ratio' ]

        files_info_predicates.append( '( width * 1.0 ) / height > ' + str( float( ratio_width ) ) + ' / ' + str( ratio_height ) )

    if 'ratio' in simple_preds:

        ( ratio_width, ratio_height ) = simple_preds[ 'ratio' ]

        files_info_predicates.append( '( width * 1.0 ) / height = ' + str( float( ratio_width ) ) + ' / ' + str( ratio_height ) )

    if 'not_ratio' in simple_preds:

        ( ratio_width, ratio_height ) = simple_preds[ 'not_ratio' ]

        files_info_predicates.append( '( width * 1.0 ) / height != ' + str( float( ratio_width ) ) + ' / ' + str( ratio_height ) )

    if 'max_ratio' in simple_preds:

        ( ratio_width, ratio_height ) = simple_preds[ 'max_ratio' ]

        files_info_predicates.append( '( width * 1.0 ) / height < ' + str( float( ratio_width ) ) + ' / ' + str( ratio_height ) )


    if 'min_num_words' in simple_preds: files_info_predicates.append( 'num_words > ' + str( simple_preds[ 'min_num_words' ] ) )
    if 'num_words' in simple_preds:

        num_words = simple_preds[ 'num_words' ]

        if num_words == 0: files_info_predicates.append( '( num_words IS NULL OR num_words = 0 )' )
        else: files_info_predicates.append( 'num_words = ' + str( num_words ) )

    if 'not_num_words' in simple_preds:

        num_words = simple_preds[ 'not_num_words' ]

        files_info_predicates.append( '( num_words IS NULL OR num_words != {} )'.format( num_words ) )

    if 'max_num_words' in simple_preds:

        max_num_words = simple_preds[ 'max_num_words' ]

        if max_num_words == 0: files_info_predicates.append( 'num_words < ' + str( max_num_words ) )
        else: files_info_predicates.append( '( num_words < ' + str( max_num_words ) + ' OR num_words IS NULL )' )


    if 'min_duration' in simple_preds: files_info_predicates.append( 'duration > ' + str( simple_preds[ 'min_duration' ] ) )
    if 'duration' in simple_preds:

        duration = simple_preds[ 'duration' ]

        if duration == 0:

            files_info_predicates.append( '( duration = 0 OR duration IS NULL )' )

        else:

            files_info_predicates.append( 'duration = ' + str( duration ) )


    if 'not_duration' in simple_preds:

        duration = simple_preds[ 'not_duration' ]

        files_info_predicates.append( '( duration IS NULL OR duration != {} )'.format( duration ) )

    if 'max_duration' in simple_preds:

        max_duration = simple_preds[ 'max_duration' ]

        if max_duration == 0: files_info_predicates.append( 'duration < ' + str( max_duration ) )
        else: files_info_predicates.append( '( duration < ' + str( max_duration ) + ' OR duration IS NULL )' )


    if 'min_framerate' in simple_preds or 'framerate' in simple_preds or 'max_framerate' in simple_preds or 'not_framerate' in simple_preds:

        if 'not_framerate' in simple_preds:

            pred = '( duration IS NULL OR num_frames = 0 OR ( duration IS NOT NULL AND duration != 0 AND num_frames != 0 AND num_frames IS NOT NULL AND {} ) )'

            min_framerate_sql = simple_preds[ 'not_framerate' ] * 0.95
            max_framerate_sql = simple_preds[ 'not_framerate' ] * 1.05

            pred = pred.format( '( num_frames * 1.0 ) / ( duration / 1000.0 ) NOT BETWEEN {} AND {}'.format( min_framerate_sql, max_framerate_sql ) )

        else:

            min_framerate_sql = None
            max_framerate_sql = None

            pred = '( duration IS NOT NULL AND duration != 0 AND num_frames != 0 AND num_frames IS NOT NULL AND {} )'

            if 'min_framerate' in simple_preds:

                min_framerate_sql = simple_preds[ 'min_framerate' ] * 1.05

            if 'framerate' in simple_preds:

                min_framerate_sql = simple_preds[ 'framerate' ] * 0.95
                max_framerate_sql = simple_preds[ 'framerate' ] * 1.05

            if 'max_framerate' in simple_preds:

                max_framerate_sql = simple_preds[ 'max_framerate' ] * 0.95


            if min_framerate_sql is None:

                pred = pred.format( '( num_frames * 1.0 ) / ( duration / 1000.0 ) < {}'.format( max_framerate_sql ) )

            elif max_framerate_sql is None:

                pred = pred.format( '( num_frames * 1.0 ) / ( duration / 1000.0 ) > {}'.format( min_framerate_sql ) )

            else:

                pred = pred.format( '( num_frames * 1.0 ) / ( duration / 1000.0 ) BETWEEN {} AND {}'.format( min_framerate_sql, max_framerate_sql ) )


        files_info_predicates.append( pred )


    if 'min_num_frames' in simple_preds: files_info_predicates.append( 'num_frames > ' + str( simple_preds[ 'min_num_frames' ] ) )
    if 'num_frames' in simple_preds:

        num_frames = simple_preds[ 'num_frames' ]

        if num_frames == 0: files_info_predicates.append( '( num_frames IS NULL OR num_frames = 0 )' )
        else: files_info_predicates.append( 'num_frames = ' + str( num_frames ) )

    if 'not_num_frames' in simple_preds:

        num_frames = simple_preds[ 'not_num_frames' ]

        files_info_predicates.append( '( num_frames IS NULL OR num_frames != {} )'.format( num_frames ) )

    if 'max_num_frames' in simple_preds:

        max_num_frames = simple_preds[ 'max_num_frames' ]

        if max_num_frames == 0: files_info_predicates.append( 'num_frames < ' + str( max_num_frames ) )
        else: files_info_predicates.append( '( num_frames < ' + str( max_num_frames ) + ' OR num_frames IS NULL )' )


    return files_info_predicates


class ClientDBFilesSearchTags( ClientDBModule.ClientDBModule ):

    def __init__(
        self,
        cursor: sqlite3.Cursor,
        modules_services: ClientDBServices.ClientDBMasterServices,
        modules_tags: ClientDBMaster.ClientDBMasterTags,
        modules_files_storage: ClientDBFilesStorage,
        modules_mappings_counts: ClientDBMappingsCounts.ClientDBMappingsCounts,
        modules_tag_search: ClientDBTagSearch.ClientDBTagSearch
    ):

        self.modules_services = modules_services
        self.modules_tags = modules_tags
        self.modules_files_storage = modules_files_storage
        self.modules_mappings_counts = modules_mappings_counts
        self.modules_tag_search = modules_tag_search

        ClientDBModule.ClientDBModule.__init__( self, 'client file search using tags', cursor )


    def GetHashIdsAndNonZeroTagCounts( self, tag_display_type: int, location_context: ClientLocation.LocationContext, tag_context: ClientSearch.TagContext, hash_ids, namespace_wildcard = '*', job_key = None ):

        if namespace_wildcard == '*':

            namespace_ids = []

        else:

            namespace_ids = self.modules_tag_search.GetNamespaceIdsFromWildcard( namespace_wildcard )


        with self._MakeTemporaryIntegerTable( namespace_ids, 'namespace_id' ) as temp_namespace_ids_table_name:

            ( file_service_keys, file_location_is_cross_referenced ) = location_context.GetCoveringCurrentFileServiceKeys()

            mapping_and_tag_table_names = set()

            for file_service_key in file_service_keys:

                mapping_and_tag_table_names.update( self.modules_tag_search.GetMappingAndTagTables( tag_display_type, file_service_key, tag_context ) )


            # reason why I (JOIN each table) rather than (join the UNION) is based on previous hell with having query planner figure out a "( a UNION b UNION c ) NATURAL JOIN stuff" situation
            # although the following sometimes makes certifiable 2KB ( 6 UNION * 4-table ) queries, it actually works fast

            # OK, a new problem is mass UNION leads to terrible cancelability because the first row cannot be fetched until the first n - 1 union queries are done
            # I tried some gubbins to try to do a pseudo table-union rather than query union and do 'get files->distinct tag count for this union of tables, and fetch hash_ids first on the union', but did not have luck

            # so NOW we are just going to do it in bits of files mate. this also reduces memory use from the distinct-making UNION with large numbers of hash_ids

            results = []

            BLOCK_SIZE = max( 64, int( len( hash_ids ) ** 0.5 ) ) # go for square root for now

            cancelled_hook = None

            if job_key is not None:

                cancelled_hook = job_key.IsCancelled


            for group_of_hash_ids in HydrusData.SplitIteratorIntoChunks( hash_ids, BLOCK_SIZE ):

                with self._MakeTemporaryIntegerTable( group_of_hash_ids, 'hash_id' ) as hash_ids_table_name:

                    if namespace_wildcard == '*':

                        # temp hashes to mappings
                        select_statements = [ 'SELECT hash_id, tag_id FROM {} CROSS JOIN {} USING ( hash_id )'.format( hash_ids_table_name, mappings_table_name ) for ( mappings_table_name, tags_table_name ) in mapping_and_tag_table_names ]

                    else:

                        # temp hashes to mappings to tags to namespaces
                        select_statements = [ 'SELECT hash_id, tag_id FROM {} CROSS JOIN {} USING ( hash_id ) CROSS JOIN {} USING ( tag_id ) CROSS JOIN {} USING ( namespace_id )'.format( hash_ids_table_name, mappings_table_name, tags_table_name, temp_namespace_ids_table_name ) for ( mappings_table_name, tags_table_name ) in mapping_and_tag_table_names ]


                    unions = '( {} )'.format( ' UNION '.join( select_statements ) )

                    query = 'SELECT hash_id, COUNT( tag_id ) FROM {} GROUP BY hash_id;'.format( unions )

                    loop_of_results = self._ExecuteCancellable( query, (), cancelled_hook )

                    if job_key is not None and job_key.IsCancelled():

                        return results


                    results.extend( loop_of_results )


            return results


    def GetHashIdsFromNamespaceIdsSubtagIds( self, tag_display_type: int, file_service_key, tag_context: ClientSearch.TagContext, namespace_ids, subtag_ids, hash_ids = None, hash_ids_table_name = None, job_key = None ):

        file_service_id = self.modules_services.GetServiceId( file_service_key )
        tag_service_id = self.modules_services.GetServiceId( tag_context.service_key )

        tag_ids = self.modules_tag_search.GetTagIdsFromNamespaceIdsSubtagIds( file_service_id, tag_service_id, namespace_ids, subtag_ids, job_key = job_key )

        return self.GetHashIdsFromTagIds( tag_display_type, file_service_key, tag_context, tag_ids, hash_ids = hash_ids, hash_ids_table_name = hash_ids_table_name, job_key = job_key )


    def GetHashIdsFromNamespaceIdsSubtagIdsTables( self, tag_display_type: int, file_service_key, tag_context: ClientSearch.TagContext, namespace_ids_table_name, subtag_ids_table_name, hash_ids = None, hash_ids_table_name = None, job_key = None ):

        file_service_id = self.modules_services.GetServiceId( file_service_key )
        tag_service_id = self.modules_services.GetServiceId( tag_context.service_key )

        tag_ids = self.modules_tag_search.GetTagIdsFromNamespaceIdsSubtagIdsTables( file_service_id, tag_service_id, namespace_ids_table_name, subtag_ids_table_name, job_key = job_key )

        return self.GetHashIdsFromTagIds( tag_display_type, file_service_key, tag_context, tag_ids, hash_ids = hash_ids, hash_ids_table_name = hash_ids_table_name, job_key = job_key )


    def GetHashIdsFromSubtagIds( self, tag_display_type: int, file_service_key, tag_context: ClientSearch.TagContext, subtag_ids, hash_ids = None, hash_ids_table_name = None, job_key = None ):

        file_service_id = self.modules_services.GetServiceId( file_service_key )
        tag_service_id = self.modules_services.GetServiceId( tag_context.service_key )

        tag_ids = self.modules_tag_search.GetTagIdsFromSubtagIds( file_service_id, tag_service_id, subtag_ids, job_key = job_key )

        return self.GetHashIdsFromTagIds( tag_display_type, file_service_key, tag_context, tag_ids, hash_ids = hash_ids, hash_ids_table_name = hash_ids_table_name, job_key = job_key )


    def GetHashIdsFromSubtagIdsTable( self, tag_display_type: int, file_service_key, tag_context: ClientSearch.TagContext, subtag_ids_table_name, hash_ids = None, hash_ids_table_name = None, job_key = None ):

        file_service_id = self.modules_services.GetServiceId( file_service_key )
        tag_service_id = self.modules_services.GetServiceId( tag_context.service_key )

        tag_ids = self.modules_tag_search.GetTagIdsFromSubtagIdsTable( file_service_id, tag_service_id, subtag_ids_table_name, job_key = job_key )

        return self.GetHashIdsFromTagIds( tag_display_type, file_service_key, tag_context, tag_ids, hash_ids = hash_ids, hash_ids_table_name = hash_ids_table_name, job_key = job_key )


    def GetHashIdsFromTag( self, tag_display_type: int, location_context: ClientLocation.LocationContext, tag_context: ClientSearch.TagContext, tag, hash_ids = None, hash_ids_table_name = None, job_key = None ):

        ( file_service_keys, file_location_is_cross_referenced ) = location_context.GetCoveringCurrentFileServiceKeys()

        if not file_location_is_cross_referenced and hash_ids_table_name is not None:

            file_location_is_cross_referenced = True


        if not self.modules_tags.TagExists( tag ):

            return set()


        results = set()

        if tag_context.service_key == CC.COMBINED_TAG_SERVICE_KEY:

            search_tag_service_ids = self.modules_services.GetServiceIds( HC.REAL_TAG_SERVICES )

        else:

            search_tag_service_ids = ( self.modules_services.GetServiceId( tag_context.service_key ), )


        service_ids_to_service_keys = self.modules_services.GetServiceIdsToServiceKeys()

        ( namespace, subtag ) = HydrusTags.SplitTag( tag )

        tag_id = self.modules_tags.GetTagId( tag )

        for search_tag_service_id in search_tag_service_ids:

            search_tag_service_key = service_ids_to_service_keys[ search_tag_service_id ]

            search_tag_context = ClientSearch.TagContext( service_key = search_tag_service_key, include_current_tags = tag_context.include_current_tags, include_pending_tags = tag_context.include_pending_tags, display_service_key = search_tag_service_key )

            ideal_tag_id = self.modules_tag_search.modules_tag_siblings.GetIdealTagId( tag_display_type, search_tag_service_id, tag_id )

            for file_service_key in file_service_keys:

                # just as a legacy note, this is where we used to do the "'samus aran' gets 'character:samus aran'" code. now, that stuff works through wildcards if user explicitly enters '*:samus aran'

                tag_ids = ( ideal_tag_id, )

                some_results = self.GetHashIdsFromTagIds( tag_display_type, file_service_key, search_tag_context, tag_ids, hash_ids = hash_ids, hash_ids_table_name = hash_ids_table_name, job_key = job_key )

                if len( results ) == 0:

                    results = some_results

                else:

                    results.update( some_results )


        if not file_location_is_cross_referenced:

            results = self.modules_files_storage.FilterHashIds( location_context, results )


        return results


    def GetHashIdsFromTagIds( self, tag_display_type: int, file_service_key: bytes, tag_context: ClientSearch.TagContext, tag_ids: typing.Collection[ int ], hash_ids = None, hash_ids_table_name = None, job_key = None ):

        do_hash_table_join = False

        if hash_ids_table_name is not None and hash_ids is not None:

            tag_service_id = self.modules_services.GetServiceId( tag_context.service_key )
            file_service_id = self.modules_services.GetServiceId( file_service_key )

            estimated_count = self.modules_mappings_counts.GetAutocompleteCountEstimate( tag_display_type, tag_service_id, file_service_id, tag_ids, tag_context.include_current_tags, tag_context.include_pending_tags )

            if ClientDBMappingsStorage.DoingAFileJoinTagSearchIsFaster( len( hash_ids ), estimated_count ):

                do_hash_table_join = True


        result_hash_ids = set()

        table_names = self.modules_tag_search.GetMappingTables( tag_display_type, file_service_key, tag_context )

        cancelled_hook = None

        if job_key is not None:

            cancelled_hook = job_key.IsCancelled


        if len( tag_ids ) == 1:

            ( tag_id, ) = tag_ids

            if do_hash_table_join:

                # temp hashes to mappings
                queries = [ 'SELECT hash_id FROM {} CROSS JOIN {} USING ( hash_id ) WHERE tag_id = ?'.format( hash_ids_table_name, table_name ) for table_name in table_names ]

            else:

                queries = [ 'SELECT hash_id FROM {} WHERE tag_id = ?;'.format( table_name ) for table_name in table_names ]


            for query in queries:

                result_hash_ids.update( self._STI( self._ExecuteCancellable( query, ( tag_id, ), cancelled_hook ) ) )


        else:

            with self._MakeTemporaryIntegerTable( tag_ids, 'tag_id' ) as temp_tag_ids_table_name:

                if do_hash_table_join:

                    # temp hashes to mappings to temp tags
                    # old method, does not do EXISTS efficiently, it makes a list instead and checks that
                    # queries = [ 'SELECT hash_id FROM {} WHERE EXISTS ( SELECT 1 FROM {} CROSS JOIN {} USING ( tag_id ) WHERE {}.hash_id = {}.hash_id );'.format( hash_ids_table_name, table_name, temp_tag_ids_table_name, table_name, hash_ids_table_name ) for table_name in table_names ]
                    # new method, this seems to actually do the correlated scalar subquery, although it does seem to be sqlite voodoo
                    queries = [ 'SELECT hash_id FROM {} WHERE EXISTS ( SELECT 1 FROM {} WHERE {}.hash_id = {}.hash_id AND EXISTS ( SELECT 1 FROM {} WHERE {}.tag_id = {}.tag_id ) );'.format( hash_ids_table_name, table_name, table_name, hash_ids_table_name, temp_tag_ids_table_name, table_name, temp_tag_ids_table_name ) for table_name in table_names ]

                else:

                    # temp tags to mappings
                    queries = [ 'SELECT hash_id FROM {} CROSS JOIN {} USING ( tag_id );'.format( temp_tag_ids_table_name, table_name ) for table_name in table_names ]


                for query in queries:

                    result_hash_ids.update( self._STI( self._ExecuteCancellable( query, (), cancelled_hook ) ) )


        return result_hash_ids


    def GetHashIdsFromWildcardComplexLocation( self, tag_display_type: int, location_context: ClientLocation.LocationContext, tag_context: ClientSearch.TagContext, wildcard, hash_ids = None, hash_ids_table_name = None, job_key = None ):

        ( namespace_wildcard, subtag_wildcard ) = HydrusTags.SplitTag( wildcard )

        if subtag_wildcard == '*':

            return self.GetHashIdsThatHaveTagsComplexLocation( tag_display_type, location_context, tag_context, namespace_wildcard = namespace_wildcard, hash_ids_table_name = hash_ids_table_name, job_key = job_key )


        results = set()

        ( file_service_keys, file_location_is_cross_referenced ) = location_context.GetCoveringCurrentFileServiceKeys()

        if not file_location_is_cross_referenced and hash_ids_table_name is not None:

            file_location_is_cross_referenced = True


        if namespace_wildcard == '*':

            possible_namespace_ids = []

        else:

            possible_namespace_ids = self.modules_tag_search.GetNamespaceIdsFromWildcard( namespace_wildcard )

            if len( possible_namespace_ids ) == 0:

                return set()


        with self._MakeTemporaryIntegerTable( possible_namespace_ids, 'namespace_id' ) as temp_namespace_ids_table_name:

            if namespace_wildcard == '*':

                namespace_ids_table_name = None

            else:

                namespace_ids_table_name = temp_namespace_ids_table_name


            for file_service_key in file_service_keys:

                some_results = self.GetHashIdsFromWildcardSimpleLocation( tag_display_type, file_service_key, tag_context, subtag_wildcard, namespace_ids_table_name = namespace_ids_table_name, hash_ids = hash_ids, hash_ids_table_name = hash_ids_table_name, job_key = job_key )

                if len( results ) == 0:

                    results = some_results

                else:

                    results.update( some_results )


        if not file_location_is_cross_referenced:

            results = self.modules_files_storage.FilterHashIds( location_context, results )


        return results


    def GetHashIdsFromWildcardSimpleLocation( self, tag_display_type: int, file_service_key: bytes, tag_context: ClientSearch.TagContext, subtag_wildcard, namespace_ids_table_name = None, hash_ids = None, hash_ids_table_name = None, job_key = None ):

        with self._MakeTemporaryIntegerTable( [], 'subtag_id' ) as temp_subtag_ids_table_name:

            file_service_id = self.modules_services.GetServiceId( file_service_key )
            tag_service_id = self.modules_services.GetServiceId( tag_context.service_key )

            self.modules_tag_search.GetSubtagIdsFromWildcardIntoTable( file_service_id, tag_service_id, subtag_wildcard, temp_subtag_ids_table_name, job_key = job_key )

            if namespace_ids_table_name is None:

                return self.GetHashIdsFromSubtagIdsTable( tag_display_type, file_service_key, tag_context, temp_subtag_ids_table_name, hash_ids = hash_ids, hash_ids_table_name = hash_ids_table_name, job_key = job_key )

            else:

                return self.GetHashIdsFromNamespaceIdsSubtagIdsTables( tag_display_type, file_service_key, tag_context, namespace_ids_table_name, temp_subtag_ids_table_name, hash_ids = hash_ids, hash_ids_table_name = hash_ids_table_name, job_key = job_key )


    def GetHashIdsThatHaveTagAsNumComplexLocation( self, tag_display_type: int, location_context: ClientLocation.LocationContext, tag_context: ClientSearch.TagContext, namespace_wildcard, num, operator, hash_ids = None, hash_ids_table_name = None, job_key = None ):

        if location_context.IsEmpty():

            return set()


        ( file_service_keys, file_location_is_cross_referenced ) = location_context.GetCoveringCurrentFileServiceKeys()

        if not file_location_is_cross_referenced and hash_ids_table_name is not None:

            file_location_is_cross_referenced = True


        results = set()

        for file_service_key in file_service_keys:

            some_results = self.GetHashIdsThatHaveTagAsNumSimpleLocation( tag_display_type, file_service_key, tag_context, namespace_wildcard, num, operator, hash_ids = hash_ids, hash_ids_table_name = hash_ids_table_name, job_key = job_key )

            if len( results ) == 0:

                results = some_results

            else:

                results.update( some_results )


        if not file_location_is_cross_referenced:

            results = self.modules_files_storage.FilterHashIds( location_context, results )


        return results


    def GetHashIdsThatHaveTagAsNumSimpleLocation( self, tag_display_type: int, file_service_key: bytes, tag_context: ClientSearch.TagContext, namespace_wildcard, num, operator, hash_ids = None, hash_ids_table_name = None, job_key = None ):

        file_service_id = self.modules_services.GetServiceId( file_service_key )
        tag_service_id = self.modules_services.GetServiceId( tag_context.service_key )

        if tag_service_id == self.modules_services.combined_tag_service_id:

            search_tag_service_ids = self.modules_services.GetServiceIds( HC.REAL_TAG_SERVICES )

        else:

            search_tag_service_ids = ( tag_service_id, )


        possible_subtag_ids = set()

        for search_tag_service_id in search_tag_service_ids:

            some_possible_subtag_ids = self.modules_tag_search.GetTagAsNumSubtagIds( file_service_id, search_tag_service_id, operator, num )

            possible_subtag_ids.update( some_possible_subtag_ids )


        if namespace_wildcard == '*':

            return self.GetHashIdsFromSubtagIds( tag_display_type, file_service_key, tag_context, possible_subtag_ids, hash_ids = hash_ids, hash_ids_table_name = hash_ids_table_name, job_key = job_key )

        else:

            possible_namespace_ids = self.modules_tag_search.GetNamespaceIdsFromWildcard( namespace_wildcard )

            return self.GetHashIdsFromNamespaceIdsSubtagIds( tag_display_type, file_service_key, tag_context, possible_namespace_ids, possible_subtag_ids, hash_ids = hash_ids, hash_ids_table_name = hash_ids_table_name, job_key = job_key )


    def GetHashIdsThatHaveTagsComplexLocation( self, tag_display_type: int, location_context: ClientLocation.LocationContext, tag_context: ClientSearch.TagContext, namespace_wildcard = '*', hash_ids_table_name = None, job_key = None ):

        if location_context.IsEmpty():

            return set()


        if namespace_wildcard == '*':

            possible_namespace_ids = []

        else:

            possible_namespace_ids = self.modules_tag_search.GetNamespaceIdsFromWildcard( namespace_wildcard )

            if len( possible_namespace_ids ) == 0:

                return set()


        results = set()

        with self._MakeTemporaryIntegerTable( possible_namespace_ids, 'namespace_id' ) as temp_namespace_ids_table_name:

            if namespace_wildcard == '*':

                namespace_ids_table_name = None

            else:

                namespace_ids_table_name = temp_namespace_ids_table_name


            ( file_service_keys, file_location_is_cross_referenced ) = location_context.GetCoveringCurrentFileServiceKeys()

            if not file_location_is_cross_referenced and hash_ids_table_name is not None:

                file_location_is_cross_referenced = True


            for file_service_key in file_service_keys:

                some_results = self.GetHashIdsThatHaveTagsSimpleLocation( tag_display_type, file_service_key, tag_context, namespace_ids_table_name = namespace_ids_table_name, hash_ids_table_name = hash_ids_table_name, job_key = job_key )

                if len( results ) == 0:

                    results = some_results

                else:

                    results.update( some_results )


        if not file_location_is_cross_referenced:

            results = self.modules_files_storage.FilterHashIds( location_context, results )


        return results


    def GetHashIdsThatHaveTagsSimpleLocation( self, tag_display_type: int, file_service_key: bytes, tag_context: ClientSearch.TagContext, namespace_ids_table_name = None, hash_ids_table_name = None, job_key = None ):

        mapping_and_tag_table_names = self.modules_tag_search.GetMappingAndTagTables( tag_display_type, file_service_key, tag_context )

        if hash_ids_table_name is None:

            if namespace_ids_table_name is None:

                # hellmode
                queries = [ 'SELECT DISTINCT hash_id FROM {};'.format( mappings_table_name ) for ( mappings_table_name, tags_table_name ) in mapping_and_tag_table_names ]

            else:

                # temp namespaces to tags to mappings
                queries = [ 'SELECT DISTINCT hash_id FROM {} CROSS JOIN {} USING ( namespace_id ) CROSS JOIN {} USING ( tag_id );'.format( namespace_ids_table_name, tags_table_name, mappings_table_name ) for ( mappings_table_name, tags_table_name ) in mapping_and_tag_table_names ]


        else:

            if namespace_ids_table_name is None:

                queries = [ 'SELECT hash_id FROM {} WHERE EXISTS ( SELECT 1 FROM {} WHERE {}.hash_id = {}.hash_id );'.format( hash_ids_table_name, mappings_table_name, mappings_table_name, hash_ids_table_name ) for ( mappings_table_name, tags_table_name ) in mapping_and_tag_table_names ]

            else:

                # temp hashes to mappings to tags to temp namespaces
                # this was originally a 'WHERE EXISTS' thing, but doing that on a three way cross join is too complex for that to work well
                # let's hope DISTINCT can save time too
                queries = [ 'SELECT DISTINCT hash_id FROM {} CROSS JOIN {} USING ( hash_id ) CROSS JOIN {} USING ( tag_id ) CROSS JOIN {} USING ( namespace_id );'.format( hash_ids_table_name, mappings_table_name, tags_table_name, namespace_ids_table_name ) for ( mappings_table_name, tags_table_name ) in mapping_and_tag_table_names ]


        cancelled_hook = None

        if job_key is not None:

            cancelled_hook = job_key.IsCancelled


        nonzero_tag_hash_ids = set()

        for query in queries:

            nonzero_tag_hash_ids.update( self._STI( self._ExecuteCancellable( query, (), cancelled_hook ) ) )

            if job_key is not None and job_key.IsCancelled():

                return set()


        return nonzero_tag_hash_ids


    def GetTablesAndColumnsThatUseDefinitions( self, content_type: int ) -> typing.List[ typing.Tuple[ str, str ] ]:

        tables_and_columns = []

        return tables_and_columns


class ClientDBFilesQuery( ClientDBModule.ClientDBModule ):

    def __init__(
        self,
        cursor: sqlite3.Cursor,
        modules_services: ClientDBServices.ClientDBMasterServices,
        modules_hashes: ClientDBMaster.ClientDBMasterHashes,
        modules_tags: ClientDBMaster.ClientDBMasterTags,
        modules_files_metadata_basic: ClientDBFilesMetadataBasic.ClientDBFilesMetadataBasic,
        modules_files_timestamps: ClientDBFilesTimestamps.ClientDBFilesTimestamps,
        modules_files_viewing_stats: ClientDBFilesViewingStats.ClientDBFilesViewingStats,
        modules_url_map: ClientDBURLMap.ClientDBURLMap,
        modules_notes_map: ClientDBNotesMap.ClientDBNotesMap,
        modules_files_storage: ClientDBFilesStorage,
        modules_files_inbox: ClientDBFilesInbox.ClientDBFilesInbox,
        modules_mappings_counts: ClientDBMappingsCounts.ClientDBMappingsCounts,
        modules_hashes_local_cache: ClientDBDefinitionsCache.ClientDBCacheLocalHashes,
        modules_tag_search: ClientDBTagSearch.ClientDBTagSearch,
        modules_similar_files: ClientDBSimilarFiles.ClientDBSimilarFiles,
        modules_files_duplicates: ClientDBFilesDuplicates.ClientDBFilesDuplicates,
        modules_files_search_tags: ClientDBFilesSearchTags
    ):

        # this is obviously a monster, so the solution is going to be to merge the sub-modules into 'search' modules like the 'tags' one above. this guy doesn't have to do search, it can farm that work out

        self.modules_services = modules_services
        self.modules_hashes = modules_hashes
        self.modules_tags = modules_tags
        self.modules_files_metadata_basic = modules_files_metadata_basic
        self.modules_files_timestamps = modules_files_timestamps
        self.modules_files_viewing_stats = modules_files_viewing_stats
        self.modules_url_map = modules_url_map
        self.modules_notes_map = modules_notes_map
        self.modules_files_storage = modules_files_storage
        self.modules_files_inbox = modules_files_inbox
        self.modules_mappings_counts = modules_mappings_counts
        self.modules_hashes_local_cache = modules_hashes_local_cache
        self.modules_tag_search = modules_tag_search
        self.modules_similar_files = modules_similar_files
        self.modules_files_duplicates = modules_files_duplicates
        self.modules_files_search_tags = modules_files_search_tags

        ClientDBModule.ClientDBModule.__init__( self, 'client file query', cursor )


    def _DoNotePreds( self, system_predicates: ClientSearch.FileSystemPredicates, query_hash_ids: typing.Optional[ typing.Set[ int ] ], job_key: typing.Optional[ ClientThreading.JobKey ] = None ) -> typing.Optional[ typing.Set[ int ] ]:

        simple_preds = system_predicates.GetSimpleInfo()

        min_num_notes = None
        max_num_notes = None

        if 'num_notes' in simple_preds:

            min_num_notes = simple_preds[ 'num_notes' ]
            max_num_notes = min_num_notes

        else:

            if 'min_num_notes' in simple_preds:

                min_num_notes = simple_preds[ 'min_num_notes' ] + 1

            if 'max_num_notes' in simple_preds:

                max_num_notes = simple_preds[ 'max_num_notes' ] - 1


        if min_num_notes is not None or max_num_notes is not None:

            with self._MakeTemporaryIntegerTable( query_hash_ids, 'hash_id' ) as temp_table_name:

                self._AnalyzeTempTable( temp_table_name )

                num_notes_hash_ids = self.modules_notes_map.GetHashIdsFromNumNotes( min_num_notes, max_num_notes, temp_table_name, job_key = job_key )

                query_hash_ids = intersection_update_qhi( query_hash_ids, num_notes_hash_ids )


        if 'has_note_names' in simple_preds:

            inclusive_note_names = simple_preds[ 'has_note_names' ]

            for note_name in inclusive_note_names:

                with self._MakeTemporaryIntegerTable( query_hash_ids, 'hash_id' ) as temp_table_name:

                    self._AnalyzeTempTable( temp_table_name )

                    notes_hash_ids = self.modules_notes_map.GetHashIdsFromNoteName( note_name, temp_table_name, job_key = job_key )

                    query_hash_ids = intersection_update_qhi( query_hash_ids, notes_hash_ids )


        if 'not_has_note_names' in simple_preds:

            exclusive_note_names = simple_preds[ 'not_has_note_names' ]

            for note_name in exclusive_note_names:

                with self._MakeTemporaryIntegerTable( query_hash_ids, 'hash_id' ) as temp_table_name:

                    self._AnalyzeTempTable( temp_table_name )

                    notes_hash_ids = self.modules_notes_map.GetHashIdsFromNoteName( note_name, temp_table_name, job_key = job_key )

                    query_hash_ids.difference_update( notes_hash_ids )


        return query_hash_ids


    def _DoOrPreds(
        self,
        file_search_context: ClientSearch.FileSearchContext,
        job_key: typing.Optional[ ClientThreading.JobKey ],
        or_predicates: typing.Collection[ ClientSearch.Predicate ],
        query_hash_ids: typing.Optional[ typing.Set[ int ] ]
    ) -> typing.Optional[ typing.Set[ int ] ]:

            # better typically to sort by fewest num of preds first, establishing query_hash_ids for longer chains
            def or_sort_key( p ):

                return len( p.GetValue() )


            or_predicates = sorted( or_predicates, key = or_sort_key )

            for or_predicate in or_predicates:

                # blue eyes OR green eyes

                or_query_hash_ids = set()

                or_subpredicates = or_predicate.GetValue()

                # [ blue eyes, green eyes ]

                for or_subpredicate in or_subpredicates:

                    # blue eyes

                    or_search_context = file_search_context.Duplicate()

                    or_search_context.SetPredicates( [ or_subpredicate ] )

                    # I pass query_hash_ids here to make these inefficient sub-searches (like -tag) potentially much faster
                    or_query_hash_ids.update( self.GetHashIdsFromQuery( or_search_context, job_key, query_hash_ids = query_hash_ids, apply_implicit_limit = False, sort_by = None, limit_sort_by = None ) )

                    if job_key.IsCancelled():

                        return set()


                query_hash_ids = intersection_update_qhi( query_hash_ids, or_query_hash_ids )


            return query_hash_ids


    def _DoSimpleRatingPreds( self, file_search_context: ClientSearch.FileSearchContext, query_hash_ids: typing.Optional[ typing.Set[ int ] ], job_key: typing.Optional[ ClientThreading.JobKey ] = None ) -> typing.Optional[ typing.Set[ int ] ]:

        cancelled_hook = None

        if job_key is not None:

            cancelled_hook = job_key.IsCancelled


        system_predicates = file_search_context.GetSystemPredicates()

        for ( operator, value, rating_service_key ) in system_predicates.GetRatingsPredicates():

            service_id = self.modules_services.GetServiceId( rating_service_key )

            if value == 'not rated':

                continue


            if value == 'rated':

                rating_hash_ids = self._STI( self._ExecuteCancellable( 'SELECT hash_id FROM local_ratings WHERE service_id = ?;', ( service_id, ), cancelled_hook ) )

                query_hash_ids = intersection_update_qhi( query_hash_ids, rating_hash_ids )

            else:

                service = HG.client_controller.services_manager.GetService( rating_service_key )

                service_type = service.GetServiceType()

                if service_type in HC.STAR_RATINGS_SERVICES:

                    if service.GetServiceType() == HC.LOCAL_RATING_LIKE:

                        half_a_star_value = 0.5

                    else:

                        one_star_value = service.GetOneStarValue()

                        half_a_star_value = one_star_value / 2


                    if isinstance( value, str ):

                        value = float( value )


                    # floats are a pain! as is storing rating as 0.0-1.0 and then allowing number of stars to change!

                    if operator == CC.UNICODE_ALMOST_EQUAL_TO:

                        predicate = str( ( value - half_a_star_value ) * 0.8 ) + ' < rating AND rating < ' + str( ( value + half_a_star_value ) * 1.2 )

                    elif operator == '<':

                        predicate = 'rating <= ' + str( value - half_a_star_value )

                    elif operator == '>':

                        predicate = 'rating > ' + str( value + half_a_star_value )

                    elif operator == '=':

                        predicate = str( value - half_a_star_value ) + ' < rating AND rating <= ' + str( value + half_a_star_value )

                    else:

                        continue


                    query = f'SELECT hash_id FROM local_ratings WHERE service_id = ? AND {predicate};'

                    rating_hash_ids = self._STI( self._ExecuteCancellable( query, ( service_id, ), cancelled_hook ) )

                    query_hash_ids = intersection_update_qhi( query_hash_ids, rating_hash_ids )

                elif service_type == HC.LOCAL_RATING_INCDEC:

                    if operator == '<' or ( operator == '=' and value == 0 ):

                        continue

                    else:

                        if operator == CC.UNICODE_ALMOST_EQUAL_TO:

                            min_value = max( value - 1, int( value * 0.8 ) )
                            max_value = min( value + 1, int( value * 1.2 ) )

                            predicate = '{} < rating AND rating < {}'.format( min_value, max_value )

                        else:

                            predicate = 'rating {} {}'.format( operator, value )


                        query = f'SELECT hash_id FROM local_incdec_ratings WHERE service_id = ? AND {predicate};'

                        rating_hash_ids = self._STI( self._ExecuteCancellable( query, ( service_id, ), cancelled_hook ) )

                        query_hash_ids = intersection_update_qhi( query_hash_ids, rating_hash_ids )


        return query_hash_ids


    def _DoTimestampPreds( self, file_search_context: ClientSearch.FileSearchContext, query_hash_ids: typing.Optional[ typing.Set[ int ] ], have_cross_referenced_file_locations: bool, job_key: typing.Optional[ ClientThreading.JobKey ] = None ) -> typing.Tuple[ typing.Optional[ typing.Set[ int ] ], bool ]:

        system_predicates = file_search_context.GetSystemPredicates()

        location_context = file_search_context.GetLocationContext()
        not_all_known_files = not location_context.IsAllKnownFiles()

        timestamp_ranges = system_predicates.GetTimestampRanges()

        cancelled_hook = None

        if job_key is not None:

            cancelled_hook = job_key.IsCancelled


        if not_all_known_files:

            # in future we will hang an explicit locationcontext off this predicate
            # for now we'll check current domain
            # if domain is deleted, we search deletion time

            if ClientSearch.PREDICATE_TYPE_SYSTEM_AGE in timestamp_ranges:

                import_timestamp_predicates = []

                ranges = timestamp_ranges[ ClientSearch.PREDICATE_TYPE_SYSTEM_AGE ]

                if '>' in ranges:

                    import_timestamp_predicates.append( 'timestamp >= {}'.format( ranges[ '>' ] ) )


                if '<' in ranges:

                    import_timestamp_predicates.append( 'timestamp <= {}'.format( ranges[ '<' ] ) )


                if len( import_timestamp_predicates ) > 0:

                    pred_string = ' AND '.join( import_timestamp_predicates )

                    table_names = []

                    table_names.extend( ( ClientDBFilesStorage.GenerateFilesTableName( self.modules_services.GetServiceId( service_key ), HC.CONTENT_STATUS_CURRENT ) for service_key in location_context.current_service_keys ) )
                    table_names.extend( ( ClientDBFilesStorage.GenerateFilesTableName( self.modules_services.GetServiceId( service_key ), HC.CONTENT_STATUS_DELETED ) for service_key in location_context.deleted_service_keys ) )

                    import_timestamp_hash_ids = set()

                    for table_name in table_names:

                        import_timestamp_hash_ids.update( self._STS( self._ExecuteCancellable( 'SELECT hash_id FROM {} WHERE {};'.format( table_name, pred_string ), (), cancelled_hook ) ) )


                    query_hash_ids = intersection_update_qhi( query_hash_ids, import_timestamp_hash_ids )

                    have_cross_referenced_file_locations = True


        if ClientSearch.PREDICATE_TYPE_SYSTEM_MODIFIED_TIME in timestamp_ranges:

            ranges = timestamp_ranges[ ClientSearch.PREDICATE_TYPE_SYSTEM_MODIFIED_TIME ]

            if len( ranges ) > 0:

                modified_timestamp_hash_ids = self.modules_files_timestamps.GetHashIdsInRange( HC.TIMESTAMP_TYPE_MODIFIED_AGGREGATE, ranges, job_key = job_key )

                query_hash_ids = intersection_update_qhi( query_hash_ids, modified_timestamp_hash_ids )


        if ClientSearch.PREDICATE_TYPE_SYSTEM_ARCHIVED_TIME in timestamp_ranges:

            ranges = timestamp_ranges[ ClientSearch.PREDICATE_TYPE_SYSTEM_ARCHIVED_TIME ]

            if len( ranges ) > 0:

                archived_timestamp_hash_ids = self.modules_files_timestamps.GetHashIdsInRange( HC.TIMESTAMP_TYPE_ARCHIVED, ranges, job_key = job_key )

                query_hash_ids = intersection_update_qhi( query_hash_ids, archived_timestamp_hash_ids )


        if ClientSearch.PREDICATE_TYPE_SYSTEM_LAST_VIEWED_TIME in timestamp_ranges:

            ranges = timestamp_ranges[ ClientSearch.PREDICATE_TYPE_SYSTEM_LAST_VIEWED_TIME ]

            min_last_viewed_timestamp = ranges.get( '>', None )
            max_last_viewed_timestamp = ranges.get( '<', None )

            last_viewed_timestamp_hash_ids = self.modules_files_viewing_stats.GetHashIdsFromLastViewed( min_last_viewed_timestamp = min_last_viewed_timestamp, max_last_viewed_timestamp = max_last_viewed_timestamp, job_key = job_key )

            query_hash_ids = intersection_update_qhi( query_hash_ids, last_viewed_timestamp_hash_ids )


        return ( query_hash_ids, have_cross_referenced_file_locations )


    def GetHashIdsFromQuery(
        self,
        file_search_context: ClientSearch.FileSearchContext,
        job_key: typing.Optional[ ClientThreading.JobKey ] = None,
        query_hash_ids: typing.Optional[ set ] = None,
        apply_implicit_limit: bool = True,
        sort_by: typing.Optional[ ClientMedia.MediaSort ] = None,
        limit_sort_by: typing.Optional[ ClientMedia.MediaSort ] = None
    ) -> typing.List[ int ]:

        if job_key is None:

            job_key = ClientThreading.JobKey( cancellable = True )


        if query_hash_ids is not None:

            query_hash_ids = set( query_hash_ids )


        have_cross_referenced_file_locations = False

        HG.client_controller.ResetIdleTimer()

        system_predicates = file_search_context.GetSystemPredicates()

        system_limit = system_predicates.GetLimit( apply_implicit_limit = apply_implicit_limit )

        if system_limit == 0:

            return []


        location_context = file_search_context.GetLocationContext()
        tag_context = file_search_context.GetTagContext()

        tag_service_key = tag_context.service_key

        if location_context.IsEmpty():

            return []


        current_file_service_ids = set()

        for current_service_key in location_context.current_service_keys:

            try:

                current_file_service_id = self.modules_services.GetServiceId( current_service_key )

            except HydrusExceptions.DataMissing:

                HydrusData.ShowText( 'A file search query was run for a file service that does not exist! If you just removed a service, you might want to try checking the search and/or restarting the client.' )

                return []


            current_file_service_ids.add( current_file_service_id )


        deleted_file_service_ids = set()

        for deleted_service_key in location_context.deleted_service_keys:

            try:

                deleted_file_service_id = self.modules_services.GetServiceId( deleted_service_key )

            except HydrusExceptions.DataMissing:

                HydrusData.ShowText( 'A file search query was run for a file service that does not exist! If you just removed a service, you might want to try checking the search and/or restarting the client.' )

                return []


            deleted_file_service_ids.add( deleted_file_service_id )


        db_location_context = self.modules_files_storage.GetDBLocationContext( location_context )

        try:

            tag_service_id = self.modules_services.GetServiceId( tag_service_key )

        except HydrusExceptions.DataMissing:

            HydrusData.ShowText( 'A file search query was run for a tag service that does not exist! If you just removed a service, you might want to check the search and/or restart the client.' )

            return []


        tags_to_include = file_search_context.GetTagsToInclude()
        tags_to_exclude = file_search_context.GetTagsToExclude()

        namespaces_to_include = file_search_context.GetNamespacesToInclude()
        namespaces_to_exclude = file_search_context.GetNamespacesToExclude()

        wildcards_to_include = file_search_context.GetWildcardsToInclude()
        wildcards_to_exclude = file_search_context.GetWildcardsToExclude()

        simple_preds = system_predicates.GetSimpleInfo()

        king_filter = system_predicates.GetKingFilter()

        or_predicates = file_search_context.GetORPredicates()

        not_all_known_files = not location_context.IsAllKnownFiles()
        there_are_tags_to_search = len( tags_to_include ) > 0 or len( namespaces_to_include ) > 0 or len( wildcards_to_include ) > 0

        # ok, let's set up the big list of simple search preds

        files_info_predicates = GetFilesInfoPredicates( system_predicates )

        there_are_simple_files_info_preds_to_search_for = len( files_info_predicates ) > 0

        #

        done_or_predicates = len( or_predicates ) == 0

        # OR round one--if nothing else will be fast, let's prep query_hash_ids now
        if not done_or_predicates and not ( there_are_tags_to_search or there_are_simple_files_info_preds_to_search_for ):

            query_hash_ids = self._DoOrPreds( file_search_context, job_key, or_predicates, query_hash_ids )

            have_cross_referenced_file_locations = True

            done_or_predicates = True

            if job_key.IsCancelled():

                return []


        #

        if 'hash' in simple_preds:

            ( search_hashes, search_hash_type, inclusive ) = simple_preds[ 'hash' ]

            if inclusive:

                if search_hash_type == 'sha256':

                    matching_sha256_hashes = [ search_hash for search_hash in search_hashes if self.modules_hashes.HasHash( search_hash ) ]

                else:

                    source_to_desired = self.modules_hashes.GetFileHashes( search_hashes, search_hash_type, 'sha256' )

                    matching_sha256_hashes = list( source_to_desired.values() )


                specific_hash_ids = self.modules_hashes_local_cache.GetHashIds( matching_sha256_hashes )

                query_hash_ids = intersection_update_qhi( query_hash_ids, specific_hash_ids )


        #

        ( query_hash_ids, have_cross_referenced_file_locations ) = self._DoTimestampPreds( file_search_context, query_hash_ids, have_cross_referenced_file_locations, job_key = job_key )

        query_hash_ids = self._DoSimpleRatingPreds( file_search_context, query_hash_ids, job_key = job_key )

        #

        for ( view_type, viewing_locations, operator, viewing_value ) in system_predicates.GetFileViewingStatsPredicates():

            only_do_zero = ( operator in ( '=', CC.UNICODE_ALMOST_EQUAL_TO ) and viewing_value == 0 ) or ( operator == '<' and viewing_value == 1 )
            include_zero = operator == '<'

            if only_do_zero:

                continue

            elif include_zero:

                continue

            else:

                viewing_hash_ids = self.modules_files_viewing_stats.GetHashIdsFromFileViewingStatistics( view_type, viewing_locations, operator, viewing_value )

                query_hash_ids = intersection_update_qhi( query_hash_ids, viewing_hash_ids )


        for ( operator, num_relationships, dupe_type ) in system_predicates.GetDuplicateRelationshipCountPredicates():

            only_do_zero = ( operator in ( '=', CC.UNICODE_ALMOST_EQUAL_TO ) and num_relationships == 0 ) or ( operator == '<' and num_relationships == 1 )
            include_zero = operator == '<'

            if only_do_zero:

                continue

            elif include_zero:

                continue

            else:

                dupe_hash_ids = self.modules_files_duplicates.GetHashIdsFromDuplicateCountPredicate( db_location_context, operator, num_relationships, dupe_type )

                query_hash_ids = intersection_update_qhi( query_hash_ids, dupe_hash_ids )

                have_cross_referenced_file_locations = True


        if system_predicates.HasSimilarToData():

            ( pixel_hashes, perceptual_hashes, max_hamming ) = system_predicates.GetSimilarToData()

            all_similar_hash_ids = set()

            pixel_hash_ids = set()

            for pixel_hash in pixel_hashes:

                if self.modules_hashes.HasHash( pixel_hash ):

                    pixel_hash_id = self.modules_hashes_local_cache.GetHashId( pixel_hash )

                    pixel_hash_ids.add( pixel_hash_id )


            if len( pixel_hash_ids ) > 0:

                similar_hash_ids_and_distances = self.modules_similar_files.SearchPixelHashes( pixel_hash_ids )

                similar_hash_ids = [ similar_hash_id for ( similar_hash_id, distance ) in similar_hash_ids_and_distances ]

                all_similar_hash_ids.update( similar_hash_ids )


            if len( perceptual_hashes ) > 0:

                similar_hash_ids_and_distances = self.modules_similar_files.SearchPerceptualHashes( perceptual_hashes, max_hamming )

                similar_hash_ids = [ similar_hash_id for ( similar_hash_id, distance ) in similar_hash_ids_and_distances ]

                all_similar_hash_ids.update( similar_hash_ids )


            query_hash_ids = intersection_update_qhi( query_hash_ids, all_similar_hash_ids )


        if system_predicates.HasSimilarToFiles():

            ( similar_to_hashes, max_hamming ) = system_predicates.GetSimilarToFiles()

            all_similar_hash_ids = set()

            for similar_to_hash in similar_to_hashes:

                hash_id = self.modules_hashes_local_cache.GetHashId( similar_to_hash )

                similar_hash_ids_and_distances = self.modules_similar_files.SearchFile( hash_id, max_hamming )

                similar_hash_ids = [ similar_hash_id for ( similar_hash_id, distance ) in similar_hash_ids_and_distances ]

                all_similar_hash_ids.update( similar_hash_ids )


            query_hash_ids = intersection_update_qhi( query_hash_ids, all_similar_hash_ids )


        is_inbox = system_predicates.MustBeInbox()

        if is_inbox:

            query_hash_ids = intersection_update_qhi( query_hash_ids, self.modules_files_inbox.inbox_hash_ids, force_create_new_set = True )


        #

        # last shot before tags and stuff to try to do these. we can only do them if query hash ids has stuff in
        done_tricky_incdec_ratings = False

        if query_hash_ids is not None:

            done_tricky_incdec_ratings = True

            for ( operator, value, rating_service_key ) in system_predicates.GetRatingsPredicates():

                if isinstance( value, int ):

                    service_id = self.modules_services.GetServiceId( rating_service_key )

                    service = HG.client_controller.services_manager.GetService( rating_service_key )

                    service_type = service.GetServiceType()

                    if service_type == HC.LOCAL_RATING_INCDEC:

                        if operator == '<' or ( operator == '=' and value == 0 ):

                            rated_hash_ids = self._STI( self._Execute( 'SELECT hash_id FROM local_incdec_ratings WHERE service_id = ?;', ( service_id, ) ) )

                            not_rated_hash_ids = query_hash_ids.difference( rated_hash_ids )

                            # 'no rating' for incdec = 0

                            rating_hash_ids = not_rated_hash_ids

                            if operator == '<' and value > 1:

                                less_than_rating_hash_ids = self._STI( self._Execute( 'SELECT hash_id FROM local_incdec_ratings WHERE service_id = ? AND rating < ?;', ( service_id, value ) ) )

                                rating_hash_ids.update( less_than_rating_hash_ids )


                            query_hash_ids = intersection_update_qhi( query_hash_ids, rating_hash_ids )


        # first tags

        if there_are_tags_to_search:

            def sort_longest_tag_first_key( s ):

                return ( 1 if HydrusTags.IsUnnamespaced( s ) else 0, -len( s ) )


            tags_to_include = list( tags_to_include )

            tags_to_include.sort( key = sort_longest_tag_first_key )

            for tag in tags_to_include:

                if query_hash_ids is None:

                    tag_query_hash_ids = self.modules_files_search_tags.GetHashIdsFromTag( ClientTags.TAG_DISPLAY_ACTUAL, location_context, tag_context, tag, job_key = job_key )

                elif is_inbox and len( query_hash_ids ) == len( self.modules_files_inbox.inbox_hash_ids ):

                    tag_query_hash_ids = self.modules_files_search_tags.GetHashIdsFromTag( ClientTags.TAG_DISPLAY_ACTUAL, location_context, tag_context, tag, hash_ids = self.modules_files_inbox.inbox_hash_ids, hash_ids_table_name = 'file_inbox', job_key = job_key )

                else:

                    with self._MakeTemporaryIntegerTable( query_hash_ids, 'hash_id' ) as temp_table_name:

                        tag_query_hash_ids = self.modules_files_search_tags.GetHashIdsFromTag( ClientTags.TAG_DISPLAY_ACTUAL, location_context, tag_context, tag, hash_ids = query_hash_ids, hash_ids_table_name = temp_table_name, job_key = job_key )


                query_hash_ids = intersection_update_qhi( query_hash_ids, tag_query_hash_ids )

                have_cross_referenced_file_locations = True

                if len( query_hash_ids ) == 0:

                    return []


            namespaces_to_include = list( namespaces_to_include )

            namespaces_to_include.sort( key = lambda n: -len( n ) )

            for namespace in namespaces_to_include:

                if query_hash_ids is None or ( is_inbox and len( query_hash_ids ) == len( self.modules_files_inbox.inbox_hash_ids ) ):

                    namespace_query_hash_ids = self.modules_files_search_tags.GetHashIdsThatHaveTagsComplexLocation( ClientTags.TAG_DISPLAY_ACTUAL, location_context, tag_context, namespace_wildcard = namespace, job_key = job_key )

                else:

                    with self._MakeTemporaryIntegerTable( query_hash_ids, 'hash_id' ) as temp_table_name:

                        self._AnalyzeTempTable( temp_table_name )

                        namespace_query_hash_ids = self.modules_files_search_tags.GetHashIdsThatHaveTagsComplexLocation( ClientTags.TAG_DISPLAY_ACTUAL, location_context, tag_context, namespace_wildcard = namespace, hash_ids_table_name = temp_table_name, job_key = job_key )


                query_hash_ids = intersection_update_qhi( query_hash_ids, namespace_query_hash_ids )

                have_cross_referenced_file_locations = True

                if len( query_hash_ids ) == 0:

                    return []


            wildcards_to_include = list( wildcards_to_include )

            wildcards_to_include.sort( key = lambda w: -len( w ) )

            for wildcard in wildcards_to_include:

                if query_hash_ids is None:

                    wildcard_query_hash_ids = self.modules_files_search_tags.GetHashIdsFromWildcardComplexLocation( ClientTags.TAG_DISPLAY_ACTUAL, location_context, tag_context, wildcard, job_key = job_key )

                else:

                    with self._MakeTemporaryIntegerTable( query_hash_ids, 'hash_id' ) as temp_table_name:

                        self._AnalyzeTempTable( temp_table_name )

                        wildcard_query_hash_ids = self.modules_files_search_tags.GetHashIdsFromWildcardComplexLocation( ClientTags.TAG_DISPLAY_ACTUAL, location_context, tag_context, wildcard, hash_ids = query_hash_ids, hash_ids_table_name = temp_table_name, job_key = job_key )


                query_hash_ids = intersection_update_qhi( query_hash_ids, wildcard_query_hash_ids )

                have_cross_referenced_file_locations = True

                if len( query_hash_ids ) == 0:

                    return []


        #

        # OR round two--if file preds will not be fast, let's step in to reduce the file domain search space
        if not done_or_predicates and not there_are_simple_files_info_preds_to_search_for:

            query_hash_ids = self._DoOrPreds( file_search_context, job_key, or_predicates, query_hash_ids )

            have_cross_referenced_file_locations = True

            done_or_predicates = True

            if job_key.IsCancelled():

                return []


        # now the simple preds and desperate last shot to populate query_hash_ids

        done_files_info_predicates = False

        we_need_some_results = query_hash_ids is None
        we_need_to_cross_reference = not_all_known_files and not have_cross_referenced_file_locations

        if we_need_some_results or we_need_to_cross_reference:

            if location_context.IsAllKnownFiles():

                query_hash_ids = intersection_update_qhi( query_hash_ids, self.modules_files_search_tags.GetHashIdsThatHaveTagsComplexLocation( ClientTags.TAG_DISPLAY_ACTUAL, location_context, tag_context, job_key = job_key ) )

            else:

                if len( files_info_predicates ) == 0:

                    files_info_predicates.insert( 0, '1=1' )
                    include_files_info = False

                else:

                    include_files_info = True


                file_info_query_hash_ids = set()

                for files_table_name in db_location_context.GetMultipleFilesTableNames():

                    if include_files_info:

                        # if a file is missing a files_info row, we can't search it with a file system pred. it is just unknown
                        files_table_name = '{} NATURAL JOIN files_info'.format( files_table_name )


                    if query_hash_ids is None:

                        loop_query_hash_ids = self._STS( self._Execute( 'SELECT hash_id FROM {} WHERE {};'.format( files_table_name, ' AND '.join( files_info_predicates ) ) ) )

                    else:

                        if is_inbox and len( query_hash_ids ) == len( self.modules_files_inbox.inbox_hash_ids ):

                            loop_query_hash_ids = self._STS( self._Execute( 'SELECT hash_id FROM {} NATURAL JOIN {} WHERE {};'.format( 'file_inbox', files_table_name, ' AND '.join( files_info_predicates ) ) ) )

                        else:

                            with self._MakeTemporaryIntegerTable( query_hash_ids, 'hash_id' ) as temp_table_name:

                                self._AnalyzeTempTable( temp_table_name )

                                loop_query_hash_ids = self._STS( self._Execute( 'SELECT hash_id FROM {} NATURAL JOIN {} WHERE {};'.format( temp_table_name, files_table_name, ' AND '.join( files_info_predicates ) ) ) )


                    if len( file_info_query_hash_ids ) == 0:

                        file_info_query_hash_ids = loop_query_hash_ids

                    else:

                        file_info_query_hash_ids.update( loop_query_hash_ids )


                query_hash_ids = intersection_update_qhi( query_hash_ids, file_info_query_hash_ids )

                have_cross_referenced_file_locations = True
                done_files_info_predicates = True


        # at this point, query_hash_ids has something in it

        # if we couldn't do them earlier, now we can
        if not done_tricky_incdec_ratings:

            done_tricky_incdec_ratings = True

            for ( operator, value, rating_service_key ) in system_predicates.GetRatingsPredicates():

                if isinstance( value, int ):

                    service_id = self.modules_services.GetServiceId( rating_service_key )

                    service = HG.client_controller.services_manager.GetService( rating_service_key )

                    service_type = service.GetServiceType()

                    if service_type == HC.LOCAL_RATING_INCDEC:

                        if operator == '<' or ( operator == '=' and value == 0 ):

                            rated_hash_ids = self._STI( self._Execute( 'SELECT hash_id FROM local_incdec_ratings WHERE service_id = ?;', ( service_id, ) ) )

                            not_rated_hash_ids = query_hash_ids.difference( rated_hash_ids )

                            # 'no rating' for incdec = 0

                            rating_hash_ids = not_rated_hash_ids

                            if operator == '<' and value > 1:

                                less_than_rating_hash_ids = self._STI( self._Execute( 'SELECT hash_id FROM local_incdec_ratings WHERE service_id = ? AND rating < ?;', ( service_id, value ) ) )

                                rating_hash_ids.update( less_than_rating_hash_ids )


                            query_hash_ids = intersection_update_qhi( query_hash_ids, rating_hash_ids )


        if 'hash' in simple_preds:

            ( search_hashes, search_hash_type, inclusive ) = simple_preds[ 'hash' ]

            if not inclusive:

                if search_hash_type == 'sha256':

                    matching_sha256_hashes = [ search_hash for search_hash in search_hashes if self.modules_hashes.HasHash( search_hash ) ]

                else:

                    source_to_desired = self.modules_hashes.GetFileHashes( search_hashes, search_hash_type, 'sha256' )

                    matching_sha256_hashes = list( source_to_desired.values() )


                specific_hash_ids = self.modules_hashes_local_cache.GetHashIds( matching_sha256_hashes )

                query_hash_ids.difference_update( specific_hash_ids )


        if 'has_exif' in simple_preds:

            has_exif = simple_preds[ 'has_exif' ]

            with self._MakeTemporaryIntegerTable( query_hash_ids, 'hash_id' ) as temp_hash_ids_table_name:

                has_exif_hash_ids = self.modules_files_metadata_basic.GetHasEXIFHashIds( temp_hash_ids_table_name )


            if has_exif:

                query_hash_ids.intersection_update( has_exif_hash_ids )

            else:

                query_hash_ids.difference_update( has_exif_hash_ids )


        if 'has_human_readable_embedded_metadata' in simple_preds:

            has_human_readable_embedded_metadata = simple_preds[ 'has_human_readable_embedded_metadata' ]

            with self._MakeTemporaryIntegerTable( query_hash_ids, 'hash_id' ) as temp_hash_ids_table_name:

                has_human_readable_embedded_metadata_hash_ids = self.modules_files_metadata_basic.GetHasHumanReadableEmbeddedMetadataHashIds( temp_hash_ids_table_name )


            if has_human_readable_embedded_metadata:

                query_hash_ids.intersection_update( has_human_readable_embedded_metadata_hash_ids )

            else:

                query_hash_ids.difference_update( has_human_readable_embedded_metadata_hash_ids )


        if 'has_icc_profile' in simple_preds:

            has_icc_profile = simple_preds[ 'has_icc_profile' ]

            with self._MakeTemporaryIntegerTable( query_hash_ids, 'hash_id' ) as temp_hash_ids_table_name:

                has_icc_profile_hash_ids = self.modules_files_metadata_basic.GetHasICCProfileHashIds( temp_hash_ids_table_name )


            if has_icc_profile:

                query_hash_ids.intersection_update( has_icc_profile_hash_ids )

            else:

                query_hash_ids.difference_update( has_icc_profile_hash_ids )


        if system_predicates.MustBeArchive():

            query_hash_ids.difference_update( self.modules_files_inbox.inbox_hash_ids )


        if king_filter is not None and king_filter:

            king_hash_ids = self.modules_files_duplicates.FilterKingHashIds( query_hash_ids )

            query_hash_ids = intersection_update_qhi( query_hash_ids, king_hash_ids )


        if there_are_simple_files_info_preds_to_search_for and not done_files_info_predicates:

            with self._MakeTemporaryIntegerTable( query_hash_ids, 'hash_id' ) as temp_table_name:

                self._AnalyzeTempTable( temp_table_name )

                predicate_string = ' AND '.join( files_info_predicates )

                select = 'SELECT hash_id FROM {} NATURAL JOIN files_info WHERE {};'.format( temp_table_name, predicate_string )

                files_info_hash_ids = self._STI( self._Execute( select ) )

                query_hash_ids = intersection_update_qhi( query_hash_ids, files_info_hash_ids )


            done_files_info_predicates = True


        if job_key.IsCancelled():

            return []


        #

        # OR round three--final chance to kick in, and the preferred one. query_hash_ids is now set, so this shouldn't be super slow for most scenarios
        if not done_or_predicates:

            query_hash_ids = self._DoOrPreds( file_search_context, job_key, or_predicates, query_hash_ids )

            done_or_predicates = True

            if job_key.IsCancelled():

                return []


        # hide update files

        if location_context.IsAllLocalFiles():

            repo_update_hash_ids = set( self.modules_files_storage.GetCurrentHashIdsList( self.modules_services.local_update_service_id ) )

            query_hash_ids.difference_update( repo_update_hash_ids )


        # now subtract bad results

        if len( tags_to_exclude ) + len( namespaces_to_exclude ) + len( wildcards_to_exclude ) > 0:

            with self._MakeTemporaryIntegerTable( query_hash_ids, 'hash_id' ) as temp_table_name:

                self._AnalyzeTempTable( temp_table_name )

                for tag in tags_to_exclude:

                    unwanted_hash_ids = self.modules_files_search_tags.GetHashIdsFromTag( ClientTags.TAG_DISPLAY_ACTUAL, location_context, tag_context, tag, hash_ids = query_hash_ids, hash_ids_table_name = temp_table_name, job_key = job_key )

                    query_hash_ids.difference_update( unwanted_hash_ids )

                    if len( query_hash_ids ) == 0:

                        return []


                    self._ExecuteMany( 'DELETE FROM {} WHERE hash_id = ?;'.format( temp_table_name ), ( ( hash_id, ) for hash_id in unwanted_hash_ids ) )


                for namespace in namespaces_to_exclude:

                    unwanted_hash_ids = self.modules_files_search_tags.GetHashIdsThatHaveTagsComplexLocation( ClientTags.TAG_DISPLAY_ACTUAL, location_context, tag_context, namespace_wildcard = namespace, hash_ids_table_name = temp_table_name, job_key = job_key )

                    query_hash_ids.difference_update( unwanted_hash_ids )

                    if len( query_hash_ids ) == 0:

                        return []


                    self._ExecuteMany( 'DELETE FROM {} WHERE hash_id = ?;'.format( temp_table_name ), ( ( hash_id, ) for hash_id in unwanted_hash_ids ) )


                for wildcard in wildcards_to_exclude:

                    unwanted_hash_ids = self.modules_files_search_tags.GetHashIdsFromWildcardComplexLocation( ClientTags.TAG_DISPLAY_ACTUAL, location_context, tag_context, wildcard, hash_ids = query_hash_ids, hash_ids_table_name = temp_table_name, job_key = job_key )

                    query_hash_ids.difference_update( unwanted_hash_ids )

                    if len( query_hash_ids ) == 0:

                        return []


                    self._ExecuteMany( 'DELETE FROM {} WHERE hash_id = ?;'.format( temp_table_name ), ( ( hash_id, ) for hash_id in unwanted_hash_ids ) )


        if job_key.IsCancelled():

            return []


        #

        ( required_file_service_statuses, excluded_file_service_statuses ) = system_predicates.GetFileServiceStatuses()

        # needs query_hash_ids to have something in it!
        for ( service_key, statuses ) in required_file_service_statuses.items():

            service_id = self.modules_services.GetServiceId( service_key )

            for status in statuses:

                required_hash_ids = self.modules_files_storage.FilterHashIdsToStatus( service_id, query_hash_ids, status )

                query_hash_ids = intersection_update_qhi( query_hash_ids, required_hash_ids )


        for ( service_key, statuses ) in excluded_file_service_statuses.items():

            service_id = self.modules_services.GetServiceId( service_key )

            for status in statuses:

                excluded_hash_ids = self.modules_files_storage.FilterHashIdsToStatus( service_id, query_hash_ids, status )

                query_hash_ids.difference_update( excluded_hash_ids )


        #

        for ( operator, value, service_key ) in system_predicates.GetRatingsPredicates():

            service_id = self.modules_services.GetServiceId( service_key )

            if value == 'not rated':

                query_hash_ids.difference_update( self._STI( self._Execute( 'SELECT hash_id FROM local_ratings WHERE service_id = ?;', ( service_id, ) ) ) )


        if king_filter is not None and not king_filter:

            king_hash_ids = self.modules_files_duplicates.FilterKingHashIds( query_hash_ids )

            query_hash_ids.difference_update( king_hash_ids )


        for ( operator, num_relationships, dupe_type ) in system_predicates.GetDuplicateRelationshipCountPredicates():

            only_do_zero = ( operator in ( '=', CC.UNICODE_ALMOST_EQUAL_TO ) and num_relationships == 0 ) or ( operator == '<' and num_relationships == 1 )
            include_zero = operator == '<'

            if only_do_zero:

                nonzero_hash_ids = self.modules_files_duplicates.GetHashIdsFromDuplicateCountPredicate( db_location_context, '>', 0, dupe_type )

                query_hash_ids.difference_update( nonzero_hash_ids )

            elif include_zero:

                nonzero_hash_ids = self.modules_files_duplicates.GetHashIdsFromDuplicateCountPredicate( db_location_context, '>', 0, dupe_type )

                zero_hash_ids = query_hash_ids.difference( nonzero_hash_ids )

                accurate_except_zero_hash_ids = self.modules_files_duplicates.GetHashIdsFromDuplicateCountPredicate( db_location_context, operator, num_relationships, dupe_type )

                hash_ids = zero_hash_ids.union( accurate_except_zero_hash_ids )

                query_hash_ids = intersection_update_qhi( query_hash_ids, hash_ids )


        query_hash_ids = self._DoNotePreds( system_predicates, query_hash_ids, job_key = job_key )

        for ( view_type, viewing_locations, operator, viewing_value ) in system_predicates.GetFileViewingStatsPredicates():

            only_do_zero = ( operator in ( '=', CC.UNICODE_ALMOST_EQUAL_TO ) and viewing_value == 0 ) or ( operator == '<' and viewing_value == 1 )
            include_zero = operator == '<'

            if only_do_zero:

                nonzero_hash_ids = self.modules_files_viewing_stats.GetHashIdsFromFileViewingStatistics( view_type, viewing_locations, '>', 0 )

                query_hash_ids.difference_update( nonzero_hash_ids )

            elif include_zero:

                nonzero_hash_ids = self.modules_files_viewing_stats.GetHashIdsFromFileViewingStatistics( view_type, viewing_locations, '>', 0 )

                zero_hash_ids = query_hash_ids.difference( nonzero_hash_ids )

                accurate_except_zero_hash_ids = self.modules_files_viewing_stats.GetHashIdsFromFileViewingStatistics( view_type, viewing_locations, operator, viewing_value )

                hash_ids = zero_hash_ids.union( accurate_except_zero_hash_ids )

                query_hash_ids = intersection_update_qhi( query_hash_ids, hash_ids )


        if job_key.IsCancelled():

            return []


        #

        file_location_is_all_local = self.modules_services.LocationContextIsCoveredByCombinedLocalFiles( location_context )
        file_location_is_all_combined_local_files_deleted = location_context.IsOneDomain() and CC.COMBINED_LOCAL_FILE_SERVICE_KEY in location_context.deleted_service_keys

        must_be_local = system_predicates.MustBeLocal() or system_predicates.MustBeArchive()
        must_not_be_local = system_predicates.MustNotBeLocal()

        if file_location_is_all_local:

            # if must be all local, we are great already

            if must_not_be_local:

                query_hash_ids = set()


        elif file_location_is_all_combined_local_files_deleted:

            if must_be_local:

                query_hash_ids = set()


        elif must_be_local or must_not_be_local:

            if must_be_local:

                query_hash_ids = self.modules_files_storage.FilterHashIdsToStatus( self.modules_services.combined_local_file_service_id, query_hash_ids, HC.CONTENT_STATUS_CURRENT )

            elif must_not_be_local:

                local_hash_ids = self.modules_files_storage.GetCurrentHashIdsList( self.modules_services.combined_local_file_service_id )

                query_hash_ids.difference_update( local_hash_ids )


        #

        if 'known_url_rules' in simple_preds:

            for ( operator, rule_type, rule ) in simple_preds[ 'known_url_rules' ]:

                if rule_type == 'exact_match' or ( is_inbox and len( query_hash_ids ) == len( self.modules_files_inbox.inbox_hash_ids ) ):

                    url_hash_ids = self.modules_url_map.GetHashIdsFromURLRule( rule_type, rule )

                else:

                    with self._MakeTemporaryIntegerTable( query_hash_ids, 'hash_id' ) as temp_table_name:

                        self._AnalyzeTempTable( temp_table_name )

                        url_hash_ids = self.modules_url_map.GetHashIdsFromURLRule( rule_type, rule, hash_ids = query_hash_ids, hash_ids_table_name = temp_table_name )


                if operator: # inclusive

                    query_hash_ids = intersection_update_qhi( query_hash_ids, url_hash_ids )

                else:

                    query_hash_ids.difference_update( url_hash_ids )


        #

        namespaces_to_tests = system_predicates.GetNumTagsNumberTests()

        for ( namespace, number_tests ) in namespaces_to_tests.items():

            namespace_wildcard = namespace

            if namespace_wildcard is None:

                namespace_wildcard = '*'


            is_zero = True in ( number_test.IsZero() for number_test in number_tests )
            is_anything_but_zero = True in ( number_test.IsAnythingButZero() for number_test in number_tests )

            specific_number_tests = [ number_test for number_test in number_tests if not ( number_test.IsZero() or number_test.IsAnythingButZero() ) ]

            lambdas = [ number_test.GetLambda() for number_test in specific_number_tests ]

            megalambda = lambda x: False not in ( l( x ) for l in lambdas )

            with self._MakeTemporaryIntegerTable( query_hash_ids, 'hash_id' ) as temp_table_name:

                self._AnalyzeTempTable( temp_table_name )

                nonzero_tag_query_hash_ids = set()
                nonzero_tag_query_hash_ids_populated = False

                if is_zero or is_anything_but_zero:

                    nonzero_tag_query_hash_ids = self.modules_files_search_tags.GetHashIdsThatHaveTagsComplexLocation( ClientTags.TAG_DISPLAY_ACTUAL, location_context, tag_context, hash_ids_table_name = temp_table_name, namespace_wildcard = namespace_wildcard, job_key = job_key )
                    nonzero_tag_query_hash_ids_populated = True

                    if is_zero:

                        query_hash_ids.difference_update( nonzero_tag_query_hash_ids )


                    if is_anything_but_zero:

                        query_hash_ids = intersection_update_qhi( query_hash_ids, nonzero_tag_query_hash_ids )


            if len( specific_number_tests ) > 0:

                hash_id_tag_counts = self.modules_files_search_tags.GetHashIdsAndNonZeroTagCounts( ClientTags.TAG_DISPLAY_ACTUAL, location_context, tag_context, query_hash_ids, namespace_wildcard = namespace_wildcard, job_key = job_key )

                good_tag_count_hash_ids = { hash_id for ( hash_id, count ) in hash_id_tag_counts if megalambda( count ) }

                if megalambda( 0 ): # files with zero count are needed

                    if not nonzero_tag_query_hash_ids_populated:

                        nonzero_tag_query_hash_ids = { hash_id for ( hash_id, count ) in hash_id_tag_counts }


                    zero_hash_ids = query_hash_ids.difference( nonzero_tag_query_hash_ids )

                    good_tag_count_hash_ids.update( zero_hash_ids )


                query_hash_ids = intersection_update_qhi( query_hash_ids, good_tag_count_hash_ids )


        if job_key.IsCancelled():

            return []


        #

        if 'min_tag_as_number' in simple_preds:

            ( namespace_wildcard, num ) = simple_preds[ 'min_tag_as_number' ]

            with self._MakeTemporaryIntegerTable( query_hash_ids, 'hash_id' ) as temp_table_name:

                self._AnalyzeTempTable( temp_table_name )

                good_hash_ids = self.modules_files_search_tags.GetHashIdsThatHaveTagAsNumComplexLocation( ClientTags.TAG_DISPLAY_ACTUAL, location_context, tag_context, namespace_wildcard, num, '>', hash_ids = query_hash_ids, hash_ids_table_name = temp_table_name, job_key = job_key )


            query_hash_ids = intersection_update_qhi( query_hash_ids, good_hash_ids )


        if 'max_tag_as_number' in simple_preds:

            ( namespace_wildcard, num ) = simple_preds[ 'max_tag_as_number' ]

            with self._MakeTemporaryIntegerTable( query_hash_ids, 'hash_id' ) as temp_table_name:

                self._AnalyzeTempTable( temp_table_name )

                good_hash_ids = self.modules_files_search_tags.GetHashIdsThatHaveTagAsNumComplexLocation( ClientTags.TAG_DISPLAY_ACTUAL, location_context, tag_context, namespace_wildcard, num, '<', hash_ids = query_hash_ids, hash_ids_table_name = temp_table_name, job_key = job_key )


            query_hash_ids = intersection_update_qhi( query_hash_ids, good_hash_ids )


        if job_key.IsCancelled():

            return []


        #

        query_hash_ids = list( query_hash_ids )

        #

        we_are_applying_limit = system_limit is not None and system_limit < len( query_hash_ids )

        if we_are_applying_limit and limit_sort_by is not None and sort_by is None:

            sort_by = limit_sort_by


        did_sort = False

        if sort_by is not None and not location_context.IsAllKnownFiles():

            ( did_sort, query_hash_ids ) = self.TryToSortHashIds( location_context, query_hash_ids, sort_by )


        #

        if we_are_applying_limit:

            if not did_sort:

                query_hash_ids = random.sample( query_hash_ids, system_limit )

            else:

                query_hash_ids = query_hash_ids[:system_limit]


        return query_hash_ids


    def GetTablesAndColumnsThatUseDefinitions( self, content_type: int ) -> typing.List[ typing.Tuple[ str, str ] ]:

        tables_and_columns = []

        return tables_and_columns


    def PopulateSearchIntoTempTable( self, file_search_context: ClientSearch.FileSearchContext, temp_table_name: str ) -> typing.List[ int ]:

        query_hash_ids = self.GetHashIdsFromQuery( file_search_context, apply_implicit_limit = False )

        self._ExecuteMany( 'INSERT OR IGNORE INTO {} ( hash_id ) VALUES ( ? );'.format( temp_table_name ), ( ( hash_id, ) for hash_id in query_hash_ids ) )

        self._AnalyzeTempTable( temp_table_name )

        return query_hash_ids


    def TryToSortHashIds( self, location_context: ClientLocation.LocationContext, hash_ids, sort_by: ClientMedia.MediaSort ):

        did_sort = False

        ( sort_metadata, sort_data ) = sort_by.sort_type
        sort_order = sort_by.sort_order

        query = None
        key = lambda x: 1
        reverse = False

        if sort_metadata == 'system':

            simple_sorts = [
                CC.SORT_FILES_BY_IMPORT_TIME,
                CC.SORT_FILES_BY_FILESIZE,
                CC.SORT_FILES_BY_DURATION,
                CC.SORT_FILES_BY_FRAMERATE,
                CC.SORT_FILES_BY_NUM_FRAMES,
                CC.SORT_FILES_BY_WIDTH,
                CC.SORT_FILES_BY_HEIGHT,
                CC.SORT_FILES_BY_RATIO,
                CC.SORT_FILES_BY_NUM_PIXELS,
                CC.SORT_FILES_BY_MEDIA_VIEWS,
                CC.SORT_FILES_BY_MEDIA_VIEWTIME,
                CC.SORT_FILES_BY_APPROX_BITRATE,
                CC.SORT_FILES_BY_FILE_MODIFIED_TIMESTAMP,
                CC.SORT_FILES_BY_LAST_VIEWED_TIME,
                CC.SORT_FILES_BY_ARCHIVED_TIMESTAMP
            ]

            if sort_data in simple_sorts:

                if sort_data == CC.SORT_FILES_BY_IMPORT_TIME:

                    if location_context.IsOneDomain() and location_context.IncludesCurrent():

                        file_service_key = list( location_context.current_service_keys )[0]

                    else:

                        file_service_key = CC.COMBINED_LOCAL_FILE_SERVICE_KEY


                    file_service_id = self.modules_services.GetServiceId( file_service_key )

                    current_files_table_name = ClientDBFilesStorage.GenerateFilesTableName( file_service_id, HC.CONTENT_STATUS_CURRENT )

                    query = 'SELECT hash_id, timestamp FROM {temp_table} CROSS JOIN {current_files_table} USING ( hash_id );'.format( temp_table = '{temp_table}', current_files_table = current_files_table_name )

                elif sort_data == CC.SORT_FILES_BY_FILESIZE:

                    query = 'SELECT hash_id, size FROM {temp_table} CROSS JOIN files_info USING ( hash_id );'

                elif sort_data == CC.SORT_FILES_BY_DURATION:

                    query = 'SELECT hash_id, duration FROM {temp_table} CROSS JOIN files_info USING ( hash_id );'

                elif sort_data == CC.SORT_FILES_BY_FRAMERATE:

                    query = 'SELECT hash_id, num_frames, duration FROM {temp_table} CROSS JOIN files_info USING ( hash_id );'

                elif sort_data == CC.SORT_FILES_BY_NUM_FRAMES:

                    query = 'SELECT hash_id, num_frames FROM {temp_table} CROSS JOIN files_info USING ( hash_id );'

                elif sort_data == CC.SORT_FILES_BY_WIDTH:

                    query = 'SELECT hash_id, width FROM {temp_table} CROSS JOIN files_info USING ( hash_id );'

                elif sort_data == CC.SORT_FILES_BY_HEIGHT:

                    query = 'SELECT hash_id, height FROM {temp_table} CROSS JOIN files_info USING ( hash_id );'

                elif sort_data == CC.SORT_FILES_BY_RATIO:

                    query = 'SELECT hash_id, width, height FROM {temp_table} CROSS JOIN files_info USING ( hash_id );'

                elif sort_data == CC.SORT_FILES_BY_NUM_PIXELS:

                    query = 'SELECT hash_id, width, height FROM {temp_table} CROSS JOIN files_info USING ( hash_id );'

                elif sort_data == CC.SORT_FILES_BY_MEDIA_VIEWS:

                    query = 'SELECT hash_id, views FROM {temp_table} CROSS JOIN file_viewing_stats USING ( hash_id ) WHERE canvas_type = {canvas_type};'.format( temp_table = '{temp_table}', canvas_type = CC.CANVAS_MEDIA_VIEWER )

                elif sort_data == CC.SORT_FILES_BY_MEDIA_VIEWTIME:

                    query = 'SELECT hash_id, viewtime FROM {temp_table} CROSS JOIN file_viewing_stats USING ( hash_id ) WHERE canvas_type = {canvas_type};'.format( temp_table = '{temp_table}', canvas_type = CC.CANVAS_MEDIA_VIEWER )

                elif sort_data == CC.SORT_FILES_BY_APPROX_BITRATE:

                    query = 'SELECT hash_id, duration, num_frames, size, width, height FROM {temp_table} CROSS JOIN files_info USING ( hash_id );'

                elif sort_data == CC.SORT_FILES_BY_FILE_MODIFIED_TIMESTAMP:

                    q1 = 'SELECT hash_id, file_modified_timestamp FROM {temp_table} CROSS JOIN file_modified_timestamps USING ( hash_id )'
                    q2 = 'SELECT hash_id, file_modified_timestamp FROM {temp_table} CROSS JOIN file_domain_modified_timestamps USING ( hash_id )'

                    query = 'SELECT hash_id, MIN( file_modified_timestamp ) FROM ( {} UNION {} ) GROUP BY hash_id;'.format( q1, q2 )

                elif sort_data == CC.SORT_FILES_BY_LAST_VIEWED_TIME:

                    query = 'SELECT hash_id, last_viewed_timestamp FROM {temp_table} CROSS JOIN file_viewing_stats USING ( hash_id ) WHERE canvas_type = {canvas_type};'.format( temp_table = '{temp_table}', canvas_type = CC.CANVAS_MEDIA_VIEWER )

                elif sort_data == CC.SORT_FILES_BY_ARCHIVED_TIMESTAMP:

                    query = 'SELECT hash_id, archived_timestamp FROM {temp_table} CROSS JOIN archive_timestamps USING ( hash_id );'


                if sort_data == CC.SORT_FILES_BY_IMPORT_TIME:

                    def key( row ):

                        hash_id = row[0]
                        timestamp = row[1]

                        # hash_id to differentiate files imported in the same second

                        return ( timestamp, hash_id )


                elif sort_data == CC.SORT_FILES_BY_RATIO:

                    def key( row ):

                        width = row[1]
                        height = row[2]

                        if width is None or height is None:

                            return -1

                        else:

                            return width / height


                elif sort_data == CC.SORT_FILES_BY_FRAMERATE:

                    def key( row ):

                        num_frames = row[1]
                        duration = row[2]

                        if num_frames is None or duration is None or num_frames == 0 or duration == 0:

                            return -1

                        else:

                            return num_frames / duration


                elif sort_data == CC.SORT_FILES_BY_NUM_PIXELS:

                    def key( row ):

                        width = row[1]
                        height = row[2]

                        if width is None or height is None or width == 0 or height == 0:

                            return -1

                        else:

                            return width * height


                elif sort_data == CC.SORT_FILES_BY_APPROX_BITRATE:

                    def key( row ):

                        duration = row[1]
                        num_frames = row[2]
                        size = row[3]
                        width = row[4]
                        height = row[5]

                        if duration is None or duration == 0:

                            if size is None or size == 0:

                                duration_bitrate = -1
                                frame_bitrate = -1

                            else:

                                duration_bitrate = 0

                                if width is None or height is None:

                                    frame_bitrate = 0

                                else:

                                    if size is None or size == 0 or width is None or width == 0 or height is None or height == 0:

                                        frame_bitrate = -1

                                    else:

                                        num_pixels = width * height

                                        frame_bitrate = size / num_pixels


                        else:

                            if size is None or size == 0:

                                duration_bitrate = -1
                                frame_bitrate = -1

                            else:

                                duration_bitrate = size / duration

                                if num_frames is None or num_frames == 0:

                                    frame_bitrate = 0

                                else:

                                    frame_bitrate = duration_bitrate / num_frames


                        return ( duration_bitrate, frame_bitrate )


                else:

                    key = lambda row: -1 if row[1] is None else row[1]


                reverse = sort_order == CC.SORT_DESC

            elif sort_data == CC.SORT_FILES_BY_RANDOM:

                hash_ids = list( hash_ids )

                random.shuffle( hash_ids )

                did_sort = True

            elif sort_data == CC.SORT_FILES_BY_HASH:

                hash_ids_to_hashes = self.modules_hashes_local_cache.GetHashIdsToHashes( hash_ids = hash_ids )

                hash_ids_to_hex_hashes = { hash_id : hash.hex() for ( hash_id, hash ) in hash_ids_to_hashes.items() }

                hash_ids = sorted( hash_ids, key = lambda hash_id: hash_ids_to_hex_hashes[ hash_id ] )

                reverse = sort_order == CC.SORT_DESC


        if query is not None:

            with self._MakeTemporaryIntegerTable( hash_ids, 'hash_id' ) as temp_hash_ids_table_name:

                hash_ids_and_other_data = sorted( self._Execute( query.format( temp_table = temp_hash_ids_table_name ) ), key = key, reverse = reverse )


            original_hash_ids = set( hash_ids )

            hash_ids = [ row[0] for row in hash_ids_and_other_data ]

            # some stuff like media views won't have rows
            missing_hash_ids = original_hash_ids.difference( hash_ids )

            hash_ids.extend( missing_hash_ids )

            did_sort = True


        return ( did_sort, hash_ids )