1393 lines
59 KiB
Python
1393 lines
59 KiB
Python
import collections
|
|
import sqlite3
|
|
import typing
|
|
|
|
from hydrus.core import HydrusConstants as HC
|
|
from hydrus.core import HydrusData
|
|
from hydrus.core import HydrusDB
|
|
from hydrus.core import HydrusDBBase
|
|
from hydrus.core import HydrusGlobals as HG
|
|
from hydrus.core import HydrusTags
|
|
|
|
from hydrus.client import ClientConstants as CC
|
|
from hydrus.client import ClientSearch
|
|
from hydrus.client.db import ClientDBMappingsCounts
|
|
from hydrus.client.db import ClientDBMappingsStorage
|
|
from hydrus.client.db import ClientDBMaster
|
|
from hydrus.client.db import ClientDBModule
|
|
from hydrus.client.db import ClientDBServices
|
|
from hydrus.client.db import ClientDBTagDisplay
|
|
from hydrus.client.db import ClientDBTagSiblings
|
|
from hydrus.client.metadata import ClientTags
|
|
|
|
# Sqlite can handle -( 2 ** 63 ) -> ( 2 ** 63 ) - 1
|
|
MIN_CACHED_INTEGER = - ( 2 ** 63 )
|
|
MAX_CACHED_INTEGER = ( 2 ** 63 ) - 1
|
|
|
|
def CanCacheInteger( num ):
|
|
|
|
return MIN_CACHED_INTEGER <= num <= MAX_CACHED_INTEGER
|
|
|
|
|
|
def ConvertWildcardToSQLiteLikeParameter( wildcard ):
|
|
|
|
like_param = wildcard.replace( '*', '%' )
|
|
|
|
return like_param
|
|
|
|
def GenerateCombinedFilesIntegerSubtagsTableName( tag_service_id ):
|
|
|
|
name = 'combined_files_integer_subtags_cache'
|
|
|
|
integer_subtags_table_name = 'external_caches.{}_{}'.format( name, tag_service_id )
|
|
|
|
return integer_subtags_table_name
|
|
|
|
def GenerateCombinedFilesSubtagsFTS4TableName( tag_service_id ):
|
|
|
|
name = 'combined_files_subtags_fts4_cache'
|
|
|
|
subtags_fts4_table_name = 'external_caches.{}_{}'.format( name, tag_service_id )
|
|
|
|
return subtags_fts4_table_name
|
|
|
|
def GenerateCombinedFilesSubtagsSearchableMapTableName( tag_service_id ):
|
|
|
|
name = 'combined_files_subtags_searchable_map_cache'
|
|
|
|
subtags_searchable_map_table_name = 'external_caches.{}_{}'.format( name, tag_service_id )
|
|
|
|
return subtags_searchable_map_table_name
|
|
|
|
def GenerateCombinedFilesTagsTableName( tag_service_id ):
|
|
|
|
name = 'combined_files_tags_cache'
|
|
|
|
tags_table_name = 'external_caches.{}_{}'.format( name, tag_service_id )
|
|
|
|
return tags_table_name
|
|
|
|
def GenerateCombinedTagsTagsTableName( file_service_id ):
|
|
|
|
name = 'combined_tags_tags_cache'
|
|
|
|
tags_table_name = 'external_caches.{}_{}'.format( name, file_service_id )
|
|
|
|
return tags_table_name
|
|
|
|
def GenerateSpecificIntegerSubtagsTableName( file_service_id, tag_service_id ):
|
|
|
|
name = 'specific_integer_subtags_cache'
|
|
|
|
suffix = '{}_{}'.format( file_service_id, tag_service_id )
|
|
|
|
integer_subtags_table_name = 'external_caches.{}_{}'.format( name, suffix )
|
|
|
|
return integer_subtags_table_name
|
|
|
|
def GenerateSpecificSubtagsFTS4TableName( file_service_id, tag_service_id ):
|
|
|
|
name = 'specific_subtags_fts4_cache'
|
|
|
|
suffix = '{}_{}'.format( file_service_id, tag_service_id )
|
|
|
|
subtags_fts4_table_name = 'external_caches.{}_{}'.format( name, suffix )
|
|
|
|
return subtags_fts4_table_name
|
|
|
|
def GenerateSpecificSubtagsSearchableMapTableName( file_service_id, tag_service_id ):
|
|
|
|
name = 'specific_subtags_searchable_map_cache'
|
|
|
|
suffix = '{}_{}'.format( file_service_id, tag_service_id )
|
|
|
|
subtags_searchable_map_table_name = 'external_caches.{}_{}'.format( name, suffix )
|
|
|
|
return subtags_searchable_map_table_name
|
|
|
|
def GenerateSpecificTagsTableName( file_service_id, tag_service_id ):
|
|
|
|
name = 'specific_tags_cache'
|
|
|
|
suffix = '{}_{}'.format( file_service_id, tag_service_id )
|
|
|
|
tags_table_name = 'external_caches.{}_{}'.format( name, suffix )
|
|
|
|
return tags_table_name
|
|
|
|
def WildcardHasFTS4SearchableCharacters( wildcard: str ):
|
|
|
|
# fts4 says it can do alphanumeric or unicode with a value >= 128
|
|
|
|
for c in wildcard:
|
|
|
|
if c.isalnum() or ord( c ) >= 128 or c == '*':
|
|
|
|
return True
|
|
|
|
|
|
|
|
return False
|
|
|
|
class ClientDBTagSearch( ClientDBModule.ClientDBModule ):
|
|
|
|
CAN_REPOPULATE_ALL_MISSING_DATA = True
|
|
|
|
def __init__( self, cursor: sqlite3.Cursor, modules_services: ClientDBServices.ClientDBMasterServices, modules_tags: ClientDBMaster.ClientDBMasterTags, modules_tag_display: ClientDBTagDisplay.ClientDBTagDisplay, modules_tag_siblings: ClientDBTagSiblings.ClientDBTagSiblings, modules_mappings_counts: ClientDBMappingsCounts.ClientDBMappingsCounts ):
|
|
|
|
self.modules_services = modules_services
|
|
self.modules_tags = modules_tags
|
|
self.modules_tag_display = modules_tag_display
|
|
self.modules_tag_siblings = modules_tag_siblings
|
|
self.modules_mappings_counts = modules_mappings_counts
|
|
|
|
ClientDBModule.ClientDBModule.__init__( self, 'client tag search', cursor )
|
|
|
|
self._missing_tag_search_service_pairs = set()
|
|
|
|
|
|
def _GetServiceIndexGenerationDictSingle( self, file_service_id, tag_service_id ) -> dict:
|
|
|
|
tags_table_name = self.GetTagsTableName( file_service_id, tag_service_id )
|
|
subtags_fts4_table_name = self.GetSubtagsFTS4TableName( file_service_id, tag_service_id )
|
|
subtags_searchable_map_table_name = self.GetSubtagsSearchableMapTableName( file_service_id, tag_service_id )
|
|
integer_subtags_table_name = self.GetIntegerSubtagsTableName( file_service_id, tag_service_id )
|
|
|
|
index_generation_dict = {}
|
|
|
|
index_generation_dict[ tags_table_name ] = [
|
|
( [ 'namespace_id', 'subtag_id' ], True, 465 ),
|
|
( [ 'subtag_id' ], False, 465 )
|
|
]
|
|
|
|
index_generation_dict[ subtags_searchable_map_table_name ] = [
|
|
( [ 'searchable_subtag_id' ], False, 465 )
|
|
]
|
|
|
|
index_generation_dict[ integer_subtags_table_name ] = [
|
|
( [ 'integer_subtag' ], False, 465 )
|
|
]
|
|
|
|
return index_generation_dict
|
|
|
|
|
|
def _GetServiceIndexGenerationDict( self, service_id ) -> dict:
|
|
|
|
tag_service_id = service_id
|
|
|
|
index_generation_dict = {}
|
|
|
|
file_service_ids = list( self.modules_services.GetServiceIds( HC.FILE_SERVICES_WITH_SPECIFIC_MAPPING_CACHES ) )
|
|
file_service_ids.append( self.modules_services.combined_file_service_id )
|
|
|
|
for file_service_id in file_service_ids:
|
|
|
|
single_index_dict = self._GetServiceIndexGenerationDictSingle( file_service_id, tag_service_id )
|
|
|
|
index_generation_dict.update( single_index_dict )
|
|
|
|
|
|
return index_generation_dict
|
|
|
|
|
|
def _GetServiceTableGenerationDictSingle( self, file_service_id, tag_service_id ):
|
|
|
|
tags_table_name = self.GetTagsTableName( file_service_id, tag_service_id )
|
|
subtags_fts4_table_name = self.GetSubtagsFTS4TableName( file_service_id, tag_service_id )
|
|
subtags_searchable_map_table_name = self.GetSubtagsSearchableMapTableName( file_service_id, tag_service_id )
|
|
integer_subtags_table_name = self.GetIntegerSubtagsTableName( file_service_id, tag_service_id )
|
|
|
|
table_dict = {
|
|
tags_table_name : ( 'CREATE TABLE IF NOT EXISTS {} ( tag_id INTEGER PRIMARY KEY, namespace_id INTEGER, subtag_id INTEGER );', 465 ),
|
|
subtags_fts4_table_name : ( 'CREATE VIRTUAL TABLE IF NOT EXISTS {} USING fts4( subtag );', 465 ),
|
|
subtags_searchable_map_table_name : ( 'CREATE TABLE IF NOT EXISTS {} ( subtag_id INTEGER PRIMARY KEY, searchable_subtag_id INTEGER );', 465 ),
|
|
integer_subtags_table_name : ( 'CREATE TABLE IF NOT EXISTS {} ( subtag_id INTEGER PRIMARY KEY, integer_subtag INTEGER );', 465 )
|
|
}
|
|
|
|
return table_dict
|
|
|
|
|
|
def _GetServiceTableGenerationDict( self, service_id ) -> dict:
|
|
|
|
tag_service_id = service_id
|
|
|
|
table_dict = {}
|
|
|
|
file_service_ids = list( self.modules_services.GetServiceIds( HC.FILE_SERVICES_WITH_SPECIFIC_MAPPING_CACHES ) )
|
|
file_service_ids.append( self.modules_services.combined_file_service_id )
|
|
|
|
for file_service_id in file_service_ids:
|
|
|
|
single_table_dict = self._GetServiceTableGenerationDictSingle( file_service_id, tag_service_id )
|
|
|
|
table_dict.update( single_table_dict )
|
|
|
|
|
|
return table_dict
|
|
|
|
|
|
def _GetServiceIdsWeGenerateDynamicTablesFor( self ):
|
|
|
|
return self.modules_services.GetServiceIds( HC.REAL_TAG_SERVICES )
|
|
|
|
|
|
def _RepairRepopulateTables( self, table_names, cursor_transaction_wrapper: HydrusDBBase.DBCursorTransactionWrapper ):
|
|
|
|
file_service_ids = list( self.modules_services.GetServiceIds( HC.FILE_SERVICES_WITH_SPECIFIC_TAG_LOOKUP_CACHES ) )
|
|
file_service_ids.append( self.modules_services.combined_file_service_id )
|
|
|
|
tag_service_ids = list( self.modules_services.GetServiceIds( HC.REAL_TAG_SERVICES ) )
|
|
|
|
for tag_service_id in tag_service_ids:
|
|
|
|
for file_service_id in file_service_ids:
|
|
|
|
table_dict_for_this = self._GetServiceTableGenerationDictSingle( file_service_id, tag_service_id )
|
|
|
|
table_names_for_this = set( table_dict_for_this.keys() )
|
|
|
|
if not table_names_for_this.isdisjoint( table_names ):
|
|
|
|
self._missing_tag_search_service_pairs.add( ( file_service_id, tag_service_id ) )
|
|
|
|
|
|
|
|
|
|
|
|
def AddTags( self, file_service_id, tag_service_id, tag_ids ):
|
|
|
|
if len( tag_ids ) == 0:
|
|
|
|
return
|
|
|
|
|
|
tags_table_name = self.GetTagsTableName( file_service_id, tag_service_id )
|
|
|
|
actually_new_tag_ids = set()
|
|
|
|
for tag_id in tag_ids:
|
|
|
|
self._Execute( 'INSERT OR IGNORE INTO {} ( tag_id, namespace_id, subtag_id ) SELECT tag_id, namespace_id, subtag_id FROM tags WHERE tag_id = ?;'.format( tags_table_name ), ( tag_id, ) )
|
|
|
|
if self._GetRowCount() > 0:
|
|
|
|
actually_new_tag_ids.add( tag_id )
|
|
|
|
|
|
|
|
if len( actually_new_tag_ids ) > 0:
|
|
|
|
if file_service_id == self.modules_services.combined_file_service_id:
|
|
|
|
self._Execute( 'UPDATE service_info SET info = info + ? WHERE service_id = ? AND info_type = ?;', ( len( actually_new_tag_ids ), tag_service_id, HC.SERVICE_INFO_NUM_TAGS ) )
|
|
|
|
|
|
with self._MakeTemporaryIntegerTable( actually_new_tag_ids, 'tag_id' ) as temp_tag_ids_table_name:
|
|
|
|
# temp tags to fast tag definitions to subtags
|
|
subtag_ids_and_subtags = self._Execute( 'SELECT subtag_id, subtag FROM {} CROSS JOIN {} USING ( tag_id ) CROSS JOIN subtags USING ( subtag_id );'.format( temp_tag_ids_table_name, tags_table_name ) ).fetchall()
|
|
|
|
subtags_fts4_table_name = self.GetSubtagsFTS4TableName( file_service_id, tag_service_id )
|
|
subtags_searchable_map_table_name = self.GetSubtagsSearchableMapTableName( file_service_id, tag_service_id )
|
|
integer_subtags_table_name = self.GetIntegerSubtagsTableName( file_service_id, tag_service_id )
|
|
|
|
for ( subtag_id, subtag ) in subtag_ids_and_subtags:
|
|
|
|
searchable_subtag = ClientSearch.ConvertSubtagToSearchable( subtag )
|
|
|
|
if searchable_subtag != subtag:
|
|
|
|
searchable_subtag_id = self.modules_tags.GetSubtagId( searchable_subtag )
|
|
|
|
self._Execute( 'INSERT OR IGNORE INTO {} ( subtag_id, searchable_subtag_id ) VALUES ( ?, ? );'.format( subtags_searchable_map_table_name ), ( subtag_id, searchable_subtag_id ) )
|
|
|
|
|
|
#
|
|
|
|
self._Execute( 'INSERT OR IGNORE INTO {} ( docid, subtag ) VALUES ( ?, ? );'.format( subtags_fts4_table_name ), ( subtag_id, searchable_subtag ) )
|
|
|
|
if subtag.isdecimal():
|
|
|
|
try:
|
|
|
|
integer_subtag = int( subtag )
|
|
|
|
if CanCacheInteger( integer_subtag ):
|
|
|
|
self._Execute( 'INSERT OR IGNORE INTO {} ( subtag_id, integer_subtag ) VALUES ( ?, ? );'.format( integer_subtags_table_name ), ( subtag_id, integer_subtag ) )
|
|
|
|
|
|
except ValueError:
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def DeleteTags( self, file_service_id, tag_service_id, tag_ids ):
|
|
|
|
if len( tag_ids ) == 0:
|
|
|
|
return
|
|
|
|
|
|
if not isinstance( tag_ids, set ):
|
|
|
|
tag_ids = set( tag_ids )
|
|
|
|
|
|
#
|
|
|
|
# we always include all chained guys regardless of count
|
|
chained_tag_ids = self.modules_tag_display.GetChainsMembers( ClientTags.TAG_DISPLAY_ACTUAL, tag_service_id, tag_ids )
|
|
|
|
tag_ids = tag_ids.difference( chained_tag_ids )
|
|
|
|
#
|
|
|
|
tags_table_name = self.GetTagsTableName( file_service_id, tag_service_id )
|
|
subtags_fts4_table_name = self.GetSubtagsFTS4TableName( file_service_id, tag_service_id )
|
|
subtags_searchable_map_table_name = self.GetSubtagsSearchableMapTableName( file_service_id, tag_service_id )
|
|
integer_subtags_table_name = self.GetIntegerSubtagsTableName( file_service_id, tag_service_id )
|
|
|
|
with self._MakeTemporaryIntegerTable( tag_ids, 'tag_id' ) as temp_tag_ids_table_name:
|
|
|
|
# temp tag ids to tag definitions
|
|
subtag_ids = self._STS( self._Execute( 'SELECT subtag_id FROM {} CROSS JOIN {} USING ( tag_id );'.format( temp_tag_ids_table_name, tags_table_name ) ) )
|
|
|
|
#
|
|
|
|
self._ExecuteMany( 'DELETE FROM {} WHERE tag_id = ?;'.format( tags_table_name ), ( ( tag_id, ) for tag_id in tag_ids ) )
|
|
|
|
num_deleted = self._GetRowCount()
|
|
|
|
if num_deleted > 0:
|
|
|
|
if file_service_id == self.modules_services.combined_file_service_id:
|
|
|
|
self._Execute( 'UPDATE service_info SET info = info - ? WHERE service_id = ? AND info_type = ?;', ( num_deleted, tag_service_id, HC.SERVICE_INFO_NUM_TAGS ) )
|
|
|
|
|
|
#
|
|
|
|
# subtags may exist under other namespaces, so exclude those that do
|
|
|
|
with self._MakeTemporaryIntegerTable( subtag_ids, 'subtag_id' ) as temp_subtag_ids_table_name:
|
|
|
|
still_existing_subtag_ids = self._STS( self._Execute( 'SELECT subtag_id FROM {} CROSS JOIN {} USING ( subtag_id );'.format( temp_subtag_ids_table_name, tags_table_name ) ) )
|
|
|
|
|
|
deletee_subtag_ids = subtag_ids.difference( still_existing_subtag_ids )
|
|
|
|
self._ExecuteMany( 'DELETE FROM {} WHERE docid = ?;'.format( subtags_fts4_table_name ), ( ( subtag_id, ) for subtag_id in deletee_subtag_ids ) )
|
|
self._ExecuteMany( 'DELETE FROM {} WHERE subtag_id = ?;'.format( subtags_searchable_map_table_name ), ( ( subtag_id, ) for subtag_id in deletee_subtag_ids ) )
|
|
self._ExecuteMany( 'DELETE FROM {} WHERE subtag_id = ?;'.format( integer_subtags_table_name ), ( ( subtag_id, ) for subtag_id in deletee_subtag_ids ) )
|
|
|
|
|
|
|
|
|
|
def Drop( self, file_service_id, tag_service_id ):
|
|
|
|
tags_table_name = self.GetTagsTableName( file_service_id, tag_service_id )
|
|
|
|
self._Execute( 'DROP TABLE IF EXISTS {};'.format( tags_table_name ) )
|
|
|
|
subtags_fts4_table_name = self.GetSubtagsFTS4TableName( file_service_id, tag_service_id )
|
|
|
|
self._Execute( 'DROP TABLE IF EXISTS {};'.format( subtags_fts4_table_name ) )
|
|
|
|
subtags_searchable_map_table_name = self.GetSubtagsSearchableMapTableName( file_service_id, tag_service_id )
|
|
|
|
self._Execute( 'DROP TABLE IF EXISTS {};'.format( subtags_searchable_map_table_name ) )
|
|
|
|
integer_subtags_table_name = self.GetIntegerSubtagsTableName( file_service_id, tag_service_id )
|
|
|
|
self._Execute( 'DROP TABLE IF EXISTS {};'.format( integer_subtags_table_name ) )
|
|
|
|
|
|
def FilterExistingTagIds( self, file_service_id, tag_service_id, tag_ids_table_name ):
|
|
|
|
tags_table_name = self.GetTagsTableName( file_service_id, tag_service_id )
|
|
|
|
return self._STS( self._Execute( 'SELECT tag_id FROM {} CROSS JOIN {} USING ( tag_id );'.format( tag_ids_table_name, tags_table_name ) ) )
|
|
|
|
|
|
def Generate( self, file_service_id, tag_service_id ):
|
|
|
|
table_generation_dict = self._GetServiceTableGenerationDictSingle( file_service_id, tag_service_id )
|
|
|
|
for ( table_name, ( create_query_without_name, version_added ) ) in table_generation_dict.items():
|
|
|
|
self._CreateTable( create_query_without_name, table_name )
|
|
|
|
|
|
index_generation_dict = self._GetServiceIndexGenerationDictSingle( file_service_id, tag_service_id )
|
|
|
|
for ( table_name, columns, unique, version_added ) in self._FlattenIndexGenerationDict( index_generation_dict ):
|
|
|
|
self._CreateIndex( table_name, columns, unique = unique )
|
|
|
|
|
|
|
|
def GetAllTagIds( self, leaf: ClientDBServices.FileSearchContextLeaf, job_key = None ):
|
|
|
|
tag_ids = set()
|
|
|
|
query = '{};'.format( self.GetQueryPhraseForTagIds( leaf.file_service_id, leaf.tag_service_id ) )
|
|
|
|
cursor = self._Execute( query )
|
|
|
|
cancelled_hook = None
|
|
|
|
if job_key is not None:
|
|
|
|
cancelled_hook = job_key.IsCancelled
|
|
|
|
|
|
loop_of_tag_ids = self._STS( HydrusDB.ReadFromCancellableCursor( cursor, 1024, cancelled_hook = cancelled_hook ) )
|
|
|
|
if job_key is not None and job_key.IsCancelled():
|
|
|
|
return set()
|
|
|
|
|
|
tag_ids.update( loop_of_tag_ids )
|
|
|
|
return tag_ids
|
|
|
|
|
|
def GetAutocompletePredicates(
|
|
self,
|
|
tag_display_type: int,
|
|
file_search_context: ClientSearch.FileSearchContext,
|
|
search_text: str = '',
|
|
exact_match = False,
|
|
inclusive = True,
|
|
search_namespaces_into_full_tags = False,
|
|
zero_count_ok = False,
|
|
job_key = None
|
|
):
|
|
|
|
# TODO: So I think I should interleave this, perhaps with the SearchLeaf object, or just as GetHashIdsFromTag now does, for each tag service. don't throw 'all known tags' down to lower methods
|
|
# _Then_, you do the GeneratePredicatesFromTagIdsAndCounts for each tag service in turn (don't worry, it is quick since servces won't share tags much), and then you can do some clever sibling counting
|
|
# For instance, if we search for A on a domain where one tag service has A->B, we return the B results. Well, let's increment the A (x) count according to that, based on each service!
|
|
# and then obviously a nice big merge at the end
|
|
|
|
location_context = file_search_context.GetLocationContext()
|
|
tag_context = file_search_context.GetTagContext()
|
|
|
|
display_tag_service_id = self.modules_services.GetServiceId( tag_context.display_service_key )
|
|
|
|
if tag_context.IsAllKnownTags() and location_context.IsAllKnownFiles():
|
|
|
|
return []
|
|
|
|
|
|
include_current = tag_context.include_current_tags
|
|
include_pending = tag_context.include_pending_tags
|
|
|
|
all_predicates = []
|
|
|
|
file_search_context_branch = self.modules_services.GetFileSearchContextBranch( file_search_context )
|
|
|
|
for leaf in file_search_context_branch.IterateLeaves():
|
|
|
|
tag_ids = self.GetAutocompleteTagIds( tag_display_type, leaf, search_text, exact_match, job_key = job_key )
|
|
|
|
if ':' not in search_text and search_namespaces_into_full_tags and not exact_match:
|
|
|
|
# 'char' -> 'character:samus aran'
|
|
|
|
special_search_text = '{}*:*'.format( search_text )
|
|
|
|
tag_ids.update( self.GetAutocompleteTagIds( tag_display_type, leaf, special_search_text, exact_match, job_key = job_key ) )
|
|
|
|
|
|
if job_key is not None and job_key.IsCancelled():
|
|
|
|
return []
|
|
|
|
|
|
domain_is_cross_referenced = leaf.file_service_id != self.modules_services.combined_deleted_file_service_id
|
|
|
|
for group_of_tag_ids in HydrusData.SplitIteratorIntoChunks( tag_ids, 1000 ):
|
|
|
|
if job_key is not None and job_key.IsCancelled():
|
|
|
|
return []
|
|
|
|
|
|
ids_to_count = self.modules_mappings_counts.GetCounts( tag_display_type, leaf.tag_service_id, leaf.file_service_id, group_of_tag_ids, include_current, include_pending, domain_is_cross_referenced = domain_is_cross_referenced, zero_count_ok = zero_count_ok, job_key = job_key )
|
|
|
|
if len( ids_to_count ) == 0:
|
|
|
|
continue
|
|
|
|
|
|
#
|
|
|
|
predicates = self.modules_tag_display.GeneratePredicatesFromTagIdsAndCounts( tag_display_type, display_tag_service_id, ids_to_count, inclusive, job_key = job_key )
|
|
|
|
all_predicates.extend( predicates )
|
|
|
|
|
|
if job_key is not None and job_key.IsCancelled():
|
|
|
|
return []
|
|
|
|
|
|
|
|
predicates = ClientSearch.MergePredicates( all_predicates )
|
|
|
|
return predicates
|
|
|
|
|
|
def GetAutocompleteTagIds( self, tag_display_type: int, leaf: ClientDBServices.FileSearchContextLeaf, search_text, exact_match, job_key = None ):
|
|
|
|
if search_text == '':
|
|
|
|
return set()
|
|
|
|
|
|
( namespace, half_complete_searchable_subtag ) = HydrusTags.SplitTag( search_text )
|
|
|
|
if half_complete_searchable_subtag == '':
|
|
|
|
return set()
|
|
|
|
|
|
if exact_match:
|
|
|
|
if '*' in namespace or '*' in half_complete_searchable_subtag:
|
|
|
|
return []
|
|
|
|
|
|
|
|
if '*' in namespace:
|
|
|
|
namespace_ids = self.GetNamespaceIdsFromWildcard( namespace )
|
|
|
|
else:
|
|
|
|
if not self.modules_tags.NamespaceExists( namespace ):
|
|
|
|
return set()
|
|
|
|
|
|
namespace_ids = ( self.modules_tags.GetNamespaceId( namespace ), )
|
|
|
|
|
|
if half_complete_searchable_subtag == '*':
|
|
|
|
if namespace == '':
|
|
|
|
# hellmode 'get all tags' search
|
|
|
|
tag_ids = self.GetAllTagIds( leaf, job_key = job_key )
|
|
|
|
else:
|
|
|
|
tag_ids = self.GetTagIdsFromNamespaceIds( leaf, namespace_ids, job_key = job_key )
|
|
|
|
|
|
else:
|
|
|
|
tag_ids = set()
|
|
|
|
with self._MakeTemporaryIntegerTable( [], 'subtag_id' ) as temp_subtag_ids_table_name:
|
|
|
|
self.GetSubtagIdsFromWildcardIntoTable( leaf.file_service_id, leaf.tag_service_id, half_complete_searchable_subtag, temp_subtag_ids_table_name, job_key = job_key )
|
|
|
|
if namespace == '':
|
|
|
|
loop_of_tag_ids = self.GetTagIdsFromSubtagIdsTable( leaf.file_service_id, leaf.tag_service_id, temp_subtag_ids_table_name, job_key = job_key )
|
|
|
|
else:
|
|
|
|
with self._MakeTemporaryIntegerTable( namespace_ids, 'namespace_id' ) as temp_namespace_ids_table_name:
|
|
|
|
loop_of_tag_ids = self.GetTagIdsFromNamespaceIdsSubtagIdsTables( leaf.file_service_id, leaf.tag_service_id, temp_namespace_ids_table_name, temp_subtag_ids_table_name, job_key = job_key )
|
|
|
|
|
|
|
|
tag_ids.update( loop_of_tag_ids )
|
|
|
|
|
|
|
|
# now fetch siblings, add to set
|
|
|
|
if not isinstance( tag_ids, set ):
|
|
|
|
tag_ids = set( tag_ids )
|
|
|
|
|
|
tag_ids_without_siblings = list( tag_ids )
|
|
|
|
seen_ideal_tag_ids = collections.defaultdict( set )
|
|
|
|
for batch_of_tag_ids in HydrusData.SplitListIntoChunks( tag_ids_without_siblings, 10240 ):
|
|
|
|
with self._MakeTemporaryIntegerTable( batch_of_tag_ids, 'tag_id' ) as temp_tag_ids_table_name:
|
|
|
|
if job_key is not None and job_key.IsCancelled():
|
|
|
|
return set()
|
|
|
|
|
|
with self._MakeTemporaryIntegerTable( [], 'ideal_tag_id' ) as temp_ideal_tag_ids_table_name:
|
|
|
|
self.modules_tag_siblings.FilterChainedIdealsIntoTable( ClientTags.TAG_DISPLAY_ACTUAL, leaf.tag_service_id, temp_tag_ids_table_name, temp_ideal_tag_ids_table_name )
|
|
|
|
with self._MakeTemporaryIntegerTable( [], 'tag_id' ) as temp_chained_tag_ids_table_name:
|
|
|
|
self.modules_tag_siblings.GetChainsMembersFromIdealsTables( ClientTags.TAG_DISPLAY_ACTUAL, leaf.tag_service_id, temp_ideal_tag_ids_table_name, temp_chained_tag_ids_table_name )
|
|
|
|
tag_ids.update( self._STI( self._Execute( 'SELECT tag_id FROM {};'.format( temp_chained_tag_ids_table_name ) ) ) )
|
|
|
|
|
|
|
|
|
|
|
|
return tag_ids
|
|
|
|
|
|
def GetIntegerSubtagsTableName( self, file_service_id, tag_service_id ):
|
|
|
|
if file_service_id == self.modules_services.combined_file_service_id:
|
|
|
|
integer_subtags_table_name = GenerateCombinedFilesIntegerSubtagsTableName( tag_service_id )
|
|
|
|
else:
|
|
|
|
if self.modules_services.FileServiceIsCoveredByAllLocalFiles( file_service_id ):
|
|
|
|
file_service_id = self.modules_services.combined_local_file_service_id
|
|
|
|
|
|
integer_subtags_table_name = GenerateSpecificIntegerSubtagsTableName( file_service_id, tag_service_id )
|
|
|
|
|
|
return integer_subtags_table_name
|
|
|
|
|
|
def GetMappingTables( self, tag_display_type, file_service_key: bytes, tag_context: ClientSearch.TagContext ):
|
|
|
|
mapping_and_tag_table_names = self.GetMappingAndTagTables( tag_display_type, file_service_key, tag_context )
|
|
|
|
mapping_table_names = [ mapping_table_name for ( mapping_table_name, tag_table_name ) in mapping_and_tag_table_names ]
|
|
|
|
return mapping_table_names
|
|
|
|
|
|
def GetMappingAndTagTables( self, tag_display_type, file_service_key: bytes, tag_context: ClientSearch.TagContext ):
|
|
|
|
file_service_id = self.modules_services.GetServiceId( file_service_key )
|
|
tag_service_key = tag_context.service_key
|
|
|
|
if tag_service_key == CC.COMBINED_TAG_SERVICE_KEY:
|
|
|
|
tag_service_ids = self.modules_services.GetServiceIds( HC.REAL_TAG_SERVICES )
|
|
|
|
else:
|
|
|
|
tag_service_ids = [ self.modules_services.GetServiceId( tag_service_key ) ]
|
|
|
|
|
|
current_tables = []
|
|
pending_tables = []
|
|
|
|
for tag_service_id in tag_service_ids:
|
|
|
|
tags_table_name = self.GetTagsTableName( file_service_id, tag_service_id )
|
|
|
|
if file_service_id == self.modules_services.combined_file_service_id:
|
|
|
|
# yo this does not support ClientTags.TAG_DISPLAY_ACTUAL--big tricky problem
|
|
|
|
( current_mappings_table_name, deleted_mappings_table_name, pending_mappings_table_name, petitioned_mappings_table_name ) = ClientDBMappingsStorage.GenerateMappingsTableNames( tag_service_id )
|
|
|
|
current_tables.append( ( current_mappings_table_name, tags_table_name ) )
|
|
pending_tables.append( ( pending_mappings_table_name, tags_table_name ) )
|
|
|
|
else:
|
|
|
|
if tag_display_type == ClientTags.TAG_DISPLAY_STORAGE:
|
|
|
|
( cache_current_mappings_table_name, cache_deleted_mappings_table_name, cache_pending_mappings_table_name ) = ClientDBMappingsStorage.GenerateSpecificMappingsCacheTableNames( file_service_id, tag_service_id )
|
|
|
|
current_tables.append( ( cache_current_mappings_table_name, tags_table_name ) )
|
|
pending_tables.append( ( cache_pending_mappings_table_name, tags_table_name ) )
|
|
|
|
elif tag_display_type == ClientTags.TAG_DISPLAY_ACTUAL:
|
|
|
|
( cache_current_display_mappings_table_name, cache_pending_display_mappings_table_name ) = ClientDBMappingsStorage.GenerateSpecificDisplayMappingsCacheTableNames( file_service_id, tag_service_id )
|
|
|
|
current_tables.append( ( cache_current_display_mappings_table_name, tags_table_name ) )
|
|
pending_tables.append( ( cache_pending_display_mappings_table_name, tags_table_name ) )
|
|
|
|
|
|
|
|
|
|
table_names = []
|
|
|
|
if tag_context.include_current_tags:
|
|
|
|
table_names.extend( current_tables )
|
|
|
|
|
|
if tag_context.include_pending_tags:
|
|
|
|
table_names.extend( pending_tables )
|
|
|
|
|
|
return table_names
|
|
|
|
|
|
def GetMissingTagSearchServicePairs( self ):
|
|
|
|
return self._missing_tag_search_service_pairs
|
|
|
|
|
|
def GetNamespaceIdsFromWildcard( self, namespace_wildcard ):
|
|
|
|
if namespace_wildcard == '*':
|
|
|
|
return self._STL( self._Execute( 'SELECT namespace_id FROM namespaces;' ) )
|
|
|
|
elif '*' in namespace_wildcard:
|
|
|
|
like_param = ConvertWildcardToSQLiteLikeParameter( namespace_wildcard )
|
|
|
|
return self._STL( self._Execute( 'SELECT namespace_id FROM namespaces WHERE namespace LIKE ?;', ( like_param, ) ) )
|
|
|
|
else:
|
|
|
|
if self.modules_tags.NamespaceExists( namespace_wildcard ):
|
|
|
|
namespace_id = self.modules_tags.GetNamespaceId( namespace_wildcard )
|
|
|
|
return [ namespace_id ]
|
|
|
|
else:
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
def GetQueryPhraseForTagIds( self, file_service_id, tag_service_id ):
|
|
|
|
tags_table_name = self.GetTagsTableName( file_service_id, tag_service_id )
|
|
|
|
return 'SELECT tag_id FROM {}'.format( tags_table_name )
|
|
|
|
|
|
def GetSubtagIdsFromWildcard( self, file_service_id: int, tag_service_id: int, subtag_wildcard, job_key = None ):
|
|
|
|
if tag_service_id == self.modules_services.combined_tag_service_id:
|
|
|
|
search_tag_service_ids = self.modules_services.GetServiceIds( HC.REAL_TAG_SERVICES )
|
|
|
|
else:
|
|
|
|
search_tag_service_ids = ( tag_service_id, )
|
|
|
|
|
|
result_subtag_ids = set()
|
|
|
|
for search_tag_service_id in search_tag_service_ids:
|
|
|
|
if '*' in subtag_wildcard:
|
|
|
|
subtags_fts4_table_name = self.GetSubtagsFTS4TableName( file_service_id, search_tag_service_id )
|
|
|
|
wildcard_has_fts4_searchable_characters = WildcardHasFTS4SearchableCharacters( subtag_wildcard )
|
|
|
|
if subtag_wildcard == '*':
|
|
|
|
# hellmode, but shouldn't be called normally
|
|
cursor = self._Execute( 'SELECT docid FROM {};'.format( subtags_fts4_table_name ) )
|
|
|
|
elif ClientSearch.IsComplexWildcard( subtag_wildcard ) or not wildcard_has_fts4_searchable_characters:
|
|
|
|
# FTS4 does not support complex wildcards, so instead we'll search our raw subtags
|
|
# however, since we want to search 'searchable' text, we use the 'searchable subtags map' to cross between real and searchable
|
|
|
|
like_param = ConvertWildcardToSQLiteLikeParameter( subtag_wildcard )
|
|
|
|
if subtag_wildcard.startswith( '*' ) or not wildcard_has_fts4_searchable_characters:
|
|
|
|
# this is a SCAN, but there we go
|
|
# a potential optimisation here, in future, is to store fts4 of subtags reversed, then for '*amus', we can just search that reverse cache for 'suma*'
|
|
# and this would only double the size of the fts4 cache, the largest cache in the whole db! a steal!
|
|
# it also would not fix '*amu*', but with some cleverness could speed up '*amus ar*'
|
|
|
|
query = 'SELECT docid FROM {} WHERE subtag LIKE ?;'.format( subtags_fts4_table_name )
|
|
|
|
cursor = self._Execute( query, ( like_param, ) )
|
|
|
|
else:
|
|
|
|
# we have an optimisation here--rather than searching all subtags for bl*ah, let's search all the bl* subtags for bl*ah!
|
|
|
|
prefix_fts4_wildcard = subtag_wildcard.split( '*' )[0]
|
|
|
|
prefix_fts4_wildcard_param = '"{}*"'.format( prefix_fts4_wildcard )
|
|
|
|
query = 'SELECT docid FROM {} WHERE subtag MATCH ? AND subtag LIKE ?;'.format( subtags_fts4_table_name )
|
|
|
|
cursor = self._Execute( query, ( prefix_fts4_wildcard_param, like_param ) )
|
|
|
|
|
|
else:
|
|
|
|
# we want the " " wrapping our search text to keep whitespace words connected and in order
|
|
# "samus ar*" should not match "around samus"
|
|
|
|
# simple 'sam*' style subtag, so we can search fts4 no prob
|
|
|
|
subtags_fts4_param = '"{}"'.format( subtag_wildcard )
|
|
|
|
cursor = self._Execute( 'SELECT docid FROM {} WHERE subtag MATCH ?;'.format( subtags_fts4_table_name ), ( subtags_fts4_param, ) )
|
|
|
|
|
|
cancelled_hook = None
|
|
|
|
if job_key is not None:
|
|
|
|
cancelled_hook = job_key.IsCancelled
|
|
|
|
|
|
loop_of_subtag_ids = self._STL( HydrusDB.ReadFromCancellableCursor( cursor, 1024, cancelled_hook = cancelled_hook ) )
|
|
|
|
else:
|
|
|
|
# old notes from before we had searchable subtag map. I deleted that map once, albeit in an older and less efficient form. *don't delete it again, it has use*
|
|
#
|
|
# NOTE: doing a subtag = 'blah' lookup on subtags_fts4 tables is ultra slow, lmao!
|
|
# attempts to match '/a/' to 'a' with clever FTS4 MATCHing (i.e. a MATCH on a*\b, then an '= a') proved not super successful
|
|
# in testing, it was still a bit slow. my guess is it is still iterating through all the nodes for ^a*, the \b just makes it a bit more efficient sometimes
|
|
# in tests '^a\b' was about twice as fast as 'a*', so the \b might not even be helping at all
|
|
# so, I decided to move back to a lean and upgraded searchable subtag map, and here we are
|
|
|
|
subtags_searchable_map_table_name = self.GetSubtagsSearchableMapTableName( file_service_id, search_tag_service_id )
|
|
|
|
searchable_subtag = subtag_wildcard
|
|
|
|
if self.modules_tags.SubtagExists( searchable_subtag ):
|
|
|
|
searchable_subtag_id = self.modules_tags.GetSubtagId( searchable_subtag )
|
|
|
|
loop_of_subtag_ids = self._STS( self._Execute( 'SELECT subtag_id FROM {} WHERE searchable_subtag_id = ?;'.format( subtags_searchable_map_table_name ), ( searchable_subtag_id, ) ) )
|
|
|
|
loop_of_subtag_ids.add( searchable_subtag_id )
|
|
|
|
else:
|
|
|
|
loop_of_subtag_ids = set()
|
|
|
|
|
|
|
|
if job_key is not None and job_key.IsCancelled():
|
|
|
|
return set()
|
|
|
|
|
|
result_subtag_ids.update( loop_of_subtag_ids )
|
|
|
|
|
|
return result_subtag_ids
|
|
|
|
|
|
def GetSubtagIdsFromWildcardIntoTable( self, file_service_id: int, tag_service_id: int, subtag_wildcard, subtag_id_table_name, job_key = None ):
|
|
|
|
if tag_service_id == self.modules_services.combined_tag_service_id:
|
|
|
|
search_tag_service_ids = self.modules_services.GetServiceIds( HC.REAL_TAG_SERVICES )
|
|
|
|
else:
|
|
|
|
search_tag_service_ids = ( tag_service_id, )
|
|
|
|
|
|
for search_tag_service_id in search_tag_service_ids:
|
|
|
|
if '*' in subtag_wildcard:
|
|
|
|
subtags_fts4_table_name = self.GetSubtagsFTS4TableName( file_service_id, search_tag_service_id )
|
|
|
|
wildcard_has_fts4_searchable_characters = WildcardHasFTS4SearchableCharacters( subtag_wildcard )
|
|
|
|
if subtag_wildcard == '*':
|
|
|
|
# hellmode, but shouldn't be called normally
|
|
cursor = self._Execute( 'SELECT docid FROM {};'.format( subtags_fts4_table_name ) )
|
|
|
|
elif ClientSearch.IsComplexWildcard( subtag_wildcard ) or not wildcard_has_fts4_searchable_characters:
|
|
|
|
# FTS4 does not support complex wildcards, so instead we'll search our raw subtags
|
|
# however, since we want to search 'searchable' text, we use the 'searchable subtags map' to cross between real and searchable
|
|
|
|
like_param = ConvertWildcardToSQLiteLikeParameter( subtag_wildcard )
|
|
|
|
if subtag_wildcard.startswith( '*' ) or not wildcard_has_fts4_searchable_characters:
|
|
|
|
# this is a SCAN, but there we go
|
|
# a potential optimisation here, in future, is to store fts4 of subtags reversed, then for '*amus', we can just search that reverse cache for 'suma*'
|
|
# and this would only double the size of the fts4 cache, the largest cache in the whole db! a steal!
|
|
# it also would not fix '*amu*', but with some cleverness could speed up '*amus ar*'
|
|
|
|
query = 'SELECT docid FROM {} WHERE subtag LIKE ?;'.format( subtags_fts4_table_name )
|
|
|
|
cursor = self._Execute( query, ( like_param, ) )
|
|
|
|
else:
|
|
|
|
# we have an optimisation here--rather than searching all subtags for bl*ah, let's search all the bl* subtags for bl*ah!
|
|
|
|
prefix_fts4_wildcard = subtag_wildcard.split( '*' )[0]
|
|
|
|
prefix_fts4_wildcard_param = '"{}*"'.format( prefix_fts4_wildcard )
|
|
|
|
query = 'SELECT docid FROM {} WHERE subtag MATCH ? AND subtag LIKE ?;'.format( subtags_fts4_table_name )
|
|
|
|
cursor = self._Execute( query, ( prefix_fts4_wildcard_param, like_param ) )
|
|
|
|
|
|
else:
|
|
|
|
# we want the " " wrapping our search text to keep whitespace words connected and in order
|
|
# "samus ar*" should not match "around samus"
|
|
|
|
# simple 'sam*' style subtag, so we can search fts4 no prob
|
|
|
|
subtags_fts4_param = '"{}"'.format( subtag_wildcard )
|
|
|
|
cursor = self._Execute( 'SELECT docid FROM {} WHERE subtag MATCH ?;'.format( subtags_fts4_table_name ), ( subtags_fts4_param, ) )
|
|
|
|
|
|
cancelled_hook = None
|
|
|
|
if job_key is not None:
|
|
|
|
cancelled_hook = job_key.IsCancelled
|
|
|
|
|
|
loop_of_subtag_id_tuples = HydrusDB.ReadFromCancellableCursor( cursor, 1024, cancelled_hook = cancelled_hook )
|
|
|
|
self._ExecuteMany( 'INSERT OR IGNORE INTO {} ( subtag_id ) VALUES ( ? );'.format( subtag_id_table_name ), loop_of_subtag_id_tuples )
|
|
|
|
else:
|
|
|
|
# old notes from before we had searchable subtag map. I deleted that map once, albeit in an older and less efficient form. *don't delete it again, it has use*
|
|
#
|
|
# NOTE: doing a subtag = 'blah' lookup on subtags_fts4 tables is ultra slow, lmao!
|
|
# attempts to match '/a/' to 'a' with clever FTS4 MATCHing (i.e. a MATCH on a*\b, then an '= a') proved not super successful
|
|
# in testing, it was still a bit slow. my guess is it is still iterating through all the nodes for ^a*, the \b just makes it a bit more efficient sometimes
|
|
# in tests '^a\b' was about twice as fast as 'a*', so the \b might not even be helping at all
|
|
# so, I decided to move back to a lean and upgraded searchable subtag map, and here we are
|
|
|
|
searchable_subtag = subtag_wildcard
|
|
|
|
if self.modules_tags.SubtagExists( searchable_subtag ):
|
|
|
|
searchable_subtag_id = self.modules_tags.GetSubtagId( searchable_subtag )
|
|
|
|
self._Execute( 'INSERT OR IGNORE INTO {} ( subtag_id ) VALUES ( ? );'.format( subtag_id_table_name ), ( searchable_subtag_id, ) )
|
|
|
|
subtags_searchable_map_table_name = self.GetSubtagsSearchableMapTableName( file_service_id, search_tag_service_id )
|
|
|
|
self._Execute( 'INSERT OR IGNORE INTO {} ( subtag_id ) SELECT subtag_id FROM {} WHERE searchable_subtag_id = ?;'.format( subtag_id_table_name, subtags_searchable_map_table_name ), ( searchable_subtag_id, ) )
|
|
|
|
|
|
|
|
if job_key is not None and job_key.IsCancelled():
|
|
|
|
self._Execute( 'DELETE FROM {};'.format( subtag_id_table_name ) )
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
def GetSubtagsFTS4TableName( self, file_service_id, tag_service_id ):
|
|
|
|
if file_service_id == self.modules_services.combined_file_service_id:
|
|
|
|
subtags_fts4_table_name = GenerateCombinedFilesSubtagsFTS4TableName( tag_service_id )
|
|
|
|
else:
|
|
|
|
if self.modules_services.FileServiceIsCoveredByAllLocalFiles( file_service_id ):
|
|
|
|
file_service_id = self.modules_services.combined_local_file_service_id
|
|
|
|
|
|
subtags_fts4_table_name = GenerateSpecificSubtagsFTS4TableName( file_service_id, tag_service_id )
|
|
|
|
|
|
return subtags_fts4_table_name
|
|
|
|
|
|
def GetSubtagsSearchableMapTableName( self, file_service_id, tag_service_id ):
|
|
|
|
if file_service_id == self.modules_services.combined_file_service_id:
|
|
|
|
subtags_searchable_map_table_name = GenerateCombinedFilesSubtagsSearchableMapTableName( tag_service_id )
|
|
|
|
else:
|
|
|
|
if self.modules_services.FileServiceIsCoveredByAllLocalFiles( file_service_id ):
|
|
|
|
file_service_id = self.modules_services.combined_local_file_service_id
|
|
|
|
|
|
subtags_searchable_map_table_name = GenerateSpecificSubtagsSearchableMapTableName( file_service_id, tag_service_id )
|
|
|
|
|
|
return subtags_searchable_map_table_name
|
|
|
|
|
|
def GetTablesAndColumnsThatUseDefinitions( self, content_type: int ) -> typing.List[ typing.Tuple[ str, str ] ]:
|
|
|
|
tables_and_columns = []
|
|
|
|
if content_type == HC.CONTENT_TYPE_TAG:
|
|
|
|
tag_service_ids = self.modules_services.GetServiceIds( HC.REAL_TAG_SERVICES )
|
|
|
|
for tag_service_id in tag_service_ids:
|
|
|
|
table_dict = {}
|
|
|
|
file_service_ids = list( self.modules_services.GetServiceIds( HC.FILE_SERVICES_WITH_SPECIFIC_MAPPING_CACHES ) )
|
|
file_service_ids.append( self.modules_services.combined_file_service_id )
|
|
|
|
for file_service_id in file_service_ids:
|
|
|
|
tags_table_name = self.GetTagsTableName( file_service_id, tag_service_id )
|
|
subtags_fts4_table_name = self.GetSubtagsFTS4TableName( file_service_id, tag_service_id )
|
|
|
|
tables_and_columns.append( ( tags_table_name, 'tag_id' ) )
|
|
tables_and_columns.append( ( subtags_fts4_table_name, 'docid' ) )
|
|
|
|
|
|
|
|
|
|
return tables_and_columns
|
|
|
|
|
|
def GetTagAsNumSubtagIds( self, file_service_id, tag_service_id, operator, num ):
|
|
|
|
integer_subtags_table_name = self.GetIntegerSubtagsTableName( file_service_id, tag_service_id )
|
|
|
|
return self._STS( self._Execute( 'SELECT subtag_id FROM {} WHERE integer_subtag {} {};'.format( integer_subtags_table_name, operator, num ) ) )
|
|
|
|
|
|
def GetTagCount( self, file_service_id, tag_service_id ):
|
|
|
|
tags_table_name = self.GetTagsTableName( file_service_id, tag_service_id )
|
|
|
|
( count, ) = self._Execute( 'SELECT COUNT( * ) FROM {};'.format( tags_table_name ) ).fetchone()
|
|
|
|
return count
|
|
|
|
|
|
def GetTagIdsFromNamespaceIds( self, leaf: ClientDBServices.FileSearchContextLeaf, namespace_ids: typing.Collection[ int ], job_key = None ):
|
|
|
|
if len( namespace_ids ) == 0:
|
|
|
|
return set()
|
|
|
|
|
|
final_result_tag_ids = set()
|
|
|
|
with self._MakeTemporaryIntegerTable( namespace_ids, 'namespace_id' ) as temp_namespace_ids_table_name:
|
|
|
|
tags_table_name = self.GetTagsTableName( leaf.file_service_id, leaf.tag_service_id )
|
|
|
|
if len( namespace_ids ) == 1:
|
|
|
|
( namespace_id, ) = namespace_ids
|
|
|
|
cursor = self._Execute( 'SELECT tag_id FROM {} WHERE namespace_id = ?;'.format( tags_table_name ), ( namespace_id, ) )
|
|
|
|
else:
|
|
|
|
# temp namespaces to tags
|
|
cursor = self._Execute( 'SELECT tag_id FROM {} CROSS JOIN {} USING ( namespace_id );'.format( temp_namespace_ids_table_name, tags_table_name ) )
|
|
|
|
|
|
cancelled_hook = None
|
|
|
|
if job_key is not None:
|
|
|
|
cancelled_hook = job_key.IsCancelled
|
|
|
|
|
|
result_tag_ids = self._STS( HydrusDB.ReadFromCancellableCursor( cursor, 128, cancelled_hook = cancelled_hook ) )
|
|
|
|
if job_key is not None:
|
|
|
|
if job_key.IsCancelled():
|
|
|
|
return set()
|
|
|
|
|
|
|
|
final_result_tag_ids.update( result_tag_ids )
|
|
|
|
|
|
return final_result_tag_ids
|
|
|
|
|
|
def GetTagIdsFromNamespaceIdsSubtagIds( self, file_service_id: int, tag_service_id: int, namespace_ids: typing.Collection[ int ], subtag_ids: typing.Collection[ int ], job_key = None ):
|
|
|
|
if len( namespace_ids ) == 0 or len( subtag_ids ) == 0:
|
|
|
|
return set()
|
|
|
|
|
|
with self._MakeTemporaryIntegerTable( subtag_ids, 'subtag_id' ) as temp_subtag_ids_table_name:
|
|
|
|
with self._MakeTemporaryIntegerTable( namespace_ids, 'namespace_id' ) as temp_namespace_ids_table_name:
|
|
|
|
return self.GetTagIdsFromNamespaceIdsSubtagIdsTables( file_service_id, tag_service_id, temp_namespace_ids_table_name, temp_subtag_ids_table_name, job_key = job_key )
|
|
|
|
|
|
|
|
|
|
def GetTagIdsFromNamespaceIdsSubtagIdsTables( self, file_service_id: int, tag_service_id: int, namespace_ids_table_name: str, subtag_ids_table_name: str, job_key = None ):
|
|
|
|
final_result_tag_ids = set()
|
|
|
|
if tag_service_id == self.modules_services.combined_tag_service_id:
|
|
|
|
search_tag_service_ids = self.modules_services.GetServiceIds( HC.REAL_TAG_SERVICES )
|
|
|
|
else:
|
|
|
|
search_tag_service_ids = ( tag_service_id, )
|
|
|
|
|
|
for search_tag_service_id in search_tag_service_ids:
|
|
|
|
tags_table_name = self.GetTagsTableName( file_service_id, search_tag_service_id )
|
|
|
|
# temp subtags to tags to temp namespaces
|
|
cursor = self._Execute( 'SELECT tag_id FROM {} CROSS JOIN {} USING ( subtag_id ) CROSS JOIN {} USING ( namespace_id );'.format( subtag_ids_table_name, tags_table_name, namespace_ids_table_name ) )
|
|
|
|
cancelled_hook = None
|
|
|
|
if job_key is not None:
|
|
|
|
cancelled_hook = job_key.IsCancelled
|
|
|
|
|
|
result_tag_ids = self._STS( HydrusDB.ReadFromCancellableCursor( cursor, 128, cancelled_hook = cancelled_hook ) )
|
|
|
|
if job_key is not None:
|
|
|
|
if job_key.IsCancelled():
|
|
|
|
return set()
|
|
|
|
|
|
|
|
final_result_tag_ids.update( result_tag_ids )
|
|
|
|
|
|
return final_result_tag_ids
|
|
|
|
|
|
def GetTagIdsFromSubtagIds( self, file_service_id: int, tag_service_id: int, subtag_ids: typing.Collection[ int ], job_key = None ):
|
|
|
|
if len( subtag_ids ) == 0:
|
|
|
|
return set()
|
|
|
|
|
|
with self._MakeTemporaryIntegerTable( subtag_ids, 'subtag_id' ) as temp_subtag_ids_table_name:
|
|
|
|
return self.GetTagIdsFromSubtagIdsTable( file_service_id, tag_service_id, temp_subtag_ids_table_name, job_key = job_key )
|
|
|
|
|
|
|
|
def GetTagIdsFromSubtagIdsTable( self, file_service_id: int, tag_service_id: int, subtag_ids_table_name: str, job_key = None ):
|
|
|
|
final_result_tag_ids = set()
|
|
|
|
if tag_service_id == self.modules_services.combined_tag_service_id:
|
|
|
|
search_tag_service_ids = self.modules_services.GetServiceIds( HC.REAL_TAG_SERVICES )
|
|
|
|
else:
|
|
|
|
search_tag_service_ids = ( tag_service_id, )
|
|
|
|
|
|
for search_tag_service_id in search_tag_service_ids:
|
|
|
|
tags_table_name = self.GetTagsTableName( file_service_id, search_tag_service_id )
|
|
|
|
# temp subtags to tags
|
|
cursor = self._Execute( 'SELECT tag_id FROM {} CROSS JOIN {} USING ( subtag_id );'.format( subtag_ids_table_name, tags_table_name ) )
|
|
|
|
cancelled_hook = None
|
|
|
|
if job_key is not None:
|
|
|
|
cancelled_hook = job_key.IsCancelled
|
|
|
|
|
|
result_tag_ids = self._STS( HydrusDB.ReadFromCancellableCursor( cursor, 128, cancelled_hook = cancelled_hook ) )
|
|
|
|
if job_key is not None:
|
|
|
|
if job_key.IsCancelled():
|
|
|
|
return set()
|
|
|
|
|
|
|
|
final_result_tag_ids.update( result_tag_ids )
|
|
|
|
|
|
return final_result_tag_ids
|
|
|
|
|
|
def GetTagsTableName( self, file_service_id, tag_service_id ):
|
|
|
|
if file_service_id == self.modules_services.combined_file_service_id:
|
|
|
|
tags_table_name = GenerateCombinedFilesTagsTableName( tag_service_id )
|
|
|
|
else:
|
|
|
|
if self.modules_services.FileServiceIsCoveredByAllLocalFiles( file_service_id ):
|
|
|
|
file_service_id = self.modules_services.combined_local_file_service_id
|
|
|
|
|
|
tags_table_name = GenerateSpecificTagsTableName( file_service_id, tag_service_id )
|
|
|
|
|
|
return tags_table_name
|
|
|
|
|
|
def HasTag( self, file_service_id, tag_service_id, tag_id ):
|
|
|
|
tags_table_name = self.GetTagsTableName( file_service_id, tag_service_id )
|
|
|
|
result = self._Execute( 'SELECT 1 FROM {} WHERE tag_id = ?;'.format( tags_table_name ), ( tag_id, ) ).fetchone()
|
|
|
|
return result is not None
|
|
|
|
|
|
def RegenerateSearchableSubtagMap( self, file_service_id, tag_service_id, status_hook = None ):
|
|
|
|
subtags_fts4_table_name = self.GetSubtagsFTS4TableName( file_service_id, tag_service_id )
|
|
subtags_searchable_map_table_name = self.GetSubtagsSearchableMapTableName( file_service_id, tag_service_id )
|
|
|
|
self._Execute( 'DELETE FROM {};'.format( subtags_searchable_map_table_name ) )
|
|
|
|
query = 'SELECT docid FROM {};'.format( subtags_fts4_table_name )
|
|
|
|
BLOCK_SIZE = 10000
|
|
|
|
for ( group_of_subtag_ids, num_done, num_to_do ) in HydrusDB.ReadLargeIdQueryInSeparateChunks( self._c, query, BLOCK_SIZE ):
|
|
|
|
for subtag_id in group_of_subtag_ids:
|
|
|
|
result = self._Execute( 'SELECT subtag FROM subtags WHERE subtag_id = ?;', ( subtag_id, ) ).fetchone()
|
|
|
|
if result is None:
|
|
|
|
continue
|
|
|
|
|
|
( subtag, ) = result
|
|
|
|
searchable_subtag = ClientSearch.ConvertSubtagToSearchable( subtag )
|
|
|
|
if searchable_subtag != subtag:
|
|
|
|
searchable_subtag_id = self.modules_tags.GetSubtagId( searchable_subtag )
|
|
|
|
self._Execute( 'INSERT OR IGNORE INTO {} ( subtag_id, searchable_subtag_id ) VALUES ( ?, ? );'.format( subtags_searchable_map_table_name ), ( subtag_id, searchable_subtag_id ) )
|
|
|
|
|
|
|
|
message = HydrusData.ConvertValueRangeToPrettyString( num_done, num_to_do )
|
|
|
|
HG.client_controller.frame_splash_status.SetSubtext( message )
|
|
|
|
if status_hook is not None:
|
|
|
|
status_hook( message )
|
|
|
|
|
|
|
|
|
|
def RepopulateMissingSubtags( self, file_service_id, tag_service_id ):
|
|
|
|
tags_table_name = self.GetTagsTableName( file_service_id, tag_service_id )
|
|
subtags_fts4_table_name = self.GetSubtagsFTS4TableName( file_service_id, tag_service_id )
|
|
subtags_searchable_map_table_name = self.GetSubtagsSearchableMapTableName( file_service_id, tag_service_id )
|
|
integer_subtags_table_name = self.GetIntegerSubtagsTableName( file_service_id, tag_service_id )
|
|
|
|
missing_subtag_ids = self._STS( self._Execute( 'SELECT subtag_id FROM {} EXCEPT SELECT docid FROM {};'.format( tags_table_name, subtags_fts4_table_name ) ) )
|
|
|
|
for subtag_id in missing_subtag_ids:
|
|
|
|
result = self._Execute( 'SELECT subtag FROM subtags WHERE subtag_id = ?;', ( subtag_id, ) ).fetchone()
|
|
|
|
if result is None:
|
|
|
|
continue
|
|
|
|
|
|
( subtag, ) = result
|
|
|
|
searchable_subtag = ClientSearch.ConvertSubtagToSearchable( subtag )
|
|
|
|
if searchable_subtag != subtag:
|
|
|
|
searchable_subtag_id = self.modules_tags.GetSubtagId( searchable_subtag )
|
|
|
|
self._Execute( 'INSERT OR IGNORE INTO {} ( subtag_id, searchable_subtag_id ) VALUES ( ?, ? );'.format( subtags_searchable_map_table_name ), ( subtag_id, searchable_subtag_id ) )
|
|
|
|
|
|
#
|
|
|
|
self._Execute( 'INSERT OR IGNORE INTO {} ( docid, subtag ) VALUES ( ?, ? );'.format( subtags_fts4_table_name ), ( subtag_id, searchable_subtag ) )
|
|
|
|
if subtag.isdecimal():
|
|
|
|
try:
|
|
|
|
integer_subtag = int( subtag )
|
|
|
|
if CanCacheInteger( integer_subtag ):
|
|
|
|
self._Execute( 'INSERT OR IGNORE INTO {} ( subtag_id, integer_subtag ) VALUES ( ?, ? );'.format( integer_subtags_table_name ), ( subtag_id, integer_subtag ) )
|
|
|
|
|
|
except ValueError:
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
if len( missing_subtag_ids ) > 0:
|
|
|
|
HydrusData.ShowText( 'Repopulated {} missing subtags for {}_{}.'.format( HydrusData.ToHumanInt( len( missing_subtag_ids ) ), file_service_id, tag_service_id ) )
|
|
|
|
|
|
|